mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 09:25:45 +08:00
video: moved CSRT tracking algorithm from opencv_contrib
This commit is contained in:
parent
250b5003ee
commit
27da0bb829
@ -1553,3 +1553,9 @@
|
||||
year = {2014},
|
||||
url = {http://www.marcozuliani.com/docs/RANSAC4Dummies.pdf}
|
||||
}
|
||||
@article{Lukezic_IJCV2018,
|
||||
author={Luke{\v{z}}i{\v{c}}, Alan and Voj{'i}{\v{r}}, Tom{'a}{\v{s}} and {\v{C}}ehovin Zajc, Luka and Matas, Ji{\v{r}}{'i} and Kristan, Matej},
|
||||
title={Discriminative Correlation Filter Tracker with Channel and Spatial Reliability},
|
||||
journal={International Journal of Computer Vision},
|
||||
year={2018},
|
||||
}
|
||||
|
@ -978,8 +978,69 @@ public:
|
||||
// bool update(InputArray image, CV_OUT Rect& boundingBox) CV_OVERRIDE;
|
||||
};
|
||||
|
||||
|
||||
/** @brief the CSRT tracker
|
||||
|
||||
The implementation is based on @cite Lukezic_IJCV2018 Discriminative Correlation Filter with Channel and Spatial Reliability
|
||||
*/
|
||||
class CV_EXPORTS_W TrackerCSRT : public Tracker
|
||||
{
|
||||
protected:
|
||||
TrackerCSRT(); // use ::create()
|
||||
public:
|
||||
virtual ~TrackerCSRT() CV_OVERRIDE;
|
||||
|
||||
struct CV_EXPORTS_W_SIMPLE Params
|
||||
{
|
||||
CV_WRAP Params();
|
||||
|
||||
CV_PROP_RW bool use_hog;
|
||||
CV_PROP_RW bool use_color_names;
|
||||
CV_PROP_RW bool use_gray;
|
||||
CV_PROP_RW bool use_rgb;
|
||||
CV_PROP_RW bool use_channel_weights;
|
||||
CV_PROP_RW bool use_segmentation;
|
||||
|
||||
CV_PROP_RW std::string window_function; //!< Window function: "hann", "cheb", "kaiser"
|
||||
CV_PROP_RW float kaiser_alpha;
|
||||
CV_PROP_RW float cheb_attenuation;
|
||||
|
||||
CV_PROP_RW float template_size;
|
||||
CV_PROP_RW float gsl_sigma;
|
||||
CV_PROP_RW float hog_orientations;
|
||||
CV_PROP_RW float hog_clip;
|
||||
CV_PROP_RW float padding;
|
||||
CV_PROP_RW float filter_lr;
|
||||
CV_PROP_RW float weights_lr;
|
||||
CV_PROP_RW int num_hog_channels_used;
|
||||
CV_PROP_RW int admm_iterations;
|
||||
CV_PROP_RW int histogram_bins;
|
||||
CV_PROP_RW float histogram_lr;
|
||||
CV_PROP_RW int background_ratio;
|
||||
CV_PROP_RW int number_of_scales;
|
||||
CV_PROP_RW float scale_sigma_factor;
|
||||
CV_PROP_RW float scale_model_max_area;
|
||||
CV_PROP_RW float scale_lr;
|
||||
CV_PROP_RW float scale_step;
|
||||
|
||||
CV_PROP_RW float psr_threshold; //!< we lost the target, if the psr is lower than this.
|
||||
};
|
||||
|
||||
/** @brief Create CSRT tracker instance
|
||||
@param parameters CSRT parameters TrackerCSRT::Params
|
||||
*/
|
||||
static CV_WRAP Ptr<TrackerCSRT> create(const TrackerCSRT::Params ¶meters = TrackerCSRT::Params());
|
||||
|
||||
CV_WRAP virtual void setInitialMask(InputArray mask) = 0;
|
||||
|
||||
//! Return estimated tracking confidence
|
||||
CV_WRAP virtual float getTrackingScore() const = 0;
|
||||
};
|
||||
|
||||
|
||||
//! @} video_track
|
||||
|
||||
} // cv
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -1,5 +1,6 @@
|
||||
#ifdef HAVE_OPENCV_VIDEO
|
||||
typedef TrackerMIL::Params TrackerMIL_Params;
|
||||
typedef TrackerCSRT::Params TrackerCSRT_Params;
|
||||
typedef TrackerGOTURN::Params TrackerGOTURN_Params;
|
||||
typedef TrackerDaSiamRPN::Params TrackerDaSiamRPN_Params;
|
||||
typedef TrackerNano::Params TrackerNano_Params;
|
||||
|
654
modules/video/src/tracking/tracker_csrt.cpp
Normal file
654
modules/video/src/tracking/tracker_csrt.cpp
Normal file
@ -0,0 +1,654 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "../precomp.hpp"
|
||||
|
||||
#include "opencv2/video/detail/tracking.detail.hpp"
|
||||
|
||||
#include "tracker_csrt_segmentation.hpp"
|
||||
#include "tracker_csrt_utils.hpp"
|
||||
#include "tracker_csrt_scale_estimation.hpp"
|
||||
|
||||
namespace cv {
|
||||
namespace detail {
|
||||
inline namespace tracking {
|
||||
|
||||
/**
|
||||
* \brief Implementation of TrackerModel for CSRT algorithm
|
||||
*/
|
||||
class TrackerCSRTModel CV_FINAL : public TrackerModel
|
||||
{
|
||||
public:
|
||||
TrackerCSRTModel(){}
|
||||
~TrackerCSRTModel(){}
|
||||
protected:
|
||||
void modelEstimationImpl(const std::vector<Mat>& /*responses*/) CV_OVERRIDE {}
|
||||
void modelUpdateImpl() CV_OVERRIDE {}
|
||||
};
|
||||
|
||||
class TrackerCSRTImpl CV_FINAL : public TrackerCSRT
|
||||
{
|
||||
public:
|
||||
TrackerCSRTImpl(const Params ¶meters = Params());
|
||||
|
||||
Params params;
|
||||
|
||||
Ptr<TrackerCSRTModel> model;
|
||||
|
||||
double last_score;
|
||||
|
||||
// Tracker API
|
||||
virtual void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE;
|
||||
virtual bool update(InputArray image, Rect& boundingBox) CV_OVERRIDE;
|
||||
virtual void setInitialMask(InputArray mask) CV_OVERRIDE;
|
||||
virtual float getTrackingScore() const CV_OVERRIDE;
|
||||
|
||||
protected:
|
||||
void update_csr_filter(const Mat &image, const Mat &my_mask);
|
||||
void update_histograms(const Mat &image, const Rect ®ion);
|
||||
void extract_histograms(const Mat &image, cv::Rect region, Histogram &hf, Histogram &hb);
|
||||
std::vector<Mat> create_csr_filter(const std::vector<cv::Mat>
|
||||
img_features, const cv::Mat Y, const cv::Mat P);
|
||||
Mat calculate_response(const Mat &image, const std::vector<Mat> filter);
|
||||
Mat get_location_prior(const Rect roi, const Size2f target_size, const Size img_sz);
|
||||
Mat segment_region(const Mat &image, const Size &target_size, float scale_factor);
|
||||
Point2f estimate_new_position(const Mat &image);
|
||||
std::vector<Mat> get_features(const Mat &patch, const Size2i &feature_size);
|
||||
|
||||
bool check_mask_area(const Mat &mat, const double obj_area);
|
||||
float current_scale_factor;
|
||||
Mat window;
|
||||
Mat yf;
|
||||
Rect2f bounding_box;
|
||||
std::vector<Mat> csr_filter;
|
||||
std::vector<float> filter_weights;
|
||||
Size2f original_target_size;
|
||||
Size2i image_size;
|
||||
Size2f template_size;
|
||||
Size2i rescaled_template_size;
|
||||
float rescale_ratio;
|
||||
Point2f object_center;
|
||||
DSST dsst;
|
||||
Histogram hist_foreground;
|
||||
Histogram hist_background;
|
||||
double p_b;
|
||||
Mat erode_element;
|
||||
Mat filter_mask;
|
||||
Mat preset_mask;
|
||||
Mat default_mask;
|
||||
float default_mask_area;
|
||||
int cell_size;
|
||||
};
|
||||
|
||||
TrackerCSRTImpl::TrackerCSRTImpl(const TrackerCSRT::Params ¶meters) :
|
||||
params(parameters)
|
||||
{
|
||||
// nothing
|
||||
}
|
||||
|
||||
void TrackerCSRTImpl::setInitialMask(InputArray mask)
|
||||
{
|
||||
preset_mask = mask.getMat();
|
||||
}
|
||||
|
||||
bool TrackerCSRTImpl::check_mask_area(const Mat &mat, const double obj_area)
|
||||
{
|
||||
double threshold = 0.05;
|
||||
double mask_area= sum(mat)[0];
|
||||
if(mask_area < threshold*obj_area) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
Mat TrackerCSRTImpl::calculate_response(const Mat &image, const std::vector<Mat> filter)
|
||||
{
|
||||
Mat patch = get_subwindow(image, object_center, cvFloor(current_scale_factor * template_size.width),
|
||||
cvFloor(current_scale_factor * template_size.height));
|
||||
resize(patch, patch, rescaled_template_size, 0, 0, INTER_CUBIC);
|
||||
|
||||
std::vector<Mat> ftrs = get_features(patch, yf.size());
|
||||
std::vector<Mat> Ffeatures = fourier_transform_features(ftrs);
|
||||
Mat resp, res;
|
||||
if(params.use_channel_weights){
|
||||
res = Mat::zeros(Ffeatures[0].size(), CV_32FC2);
|
||||
Mat resp_ch;
|
||||
Mat mul_mat;
|
||||
for(size_t i = 0; i < Ffeatures.size(); ++i) {
|
||||
mulSpectrums(Ffeatures[i], filter[i], resp_ch, 0, true);
|
||||
res += (resp_ch * filter_weights[i]);
|
||||
}
|
||||
idft(res, res, DFT_SCALE | DFT_REAL_OUTPUT);
|
||||
} else {
|
||||
res = Mat::zeros(Ffeatures[0].size(), CV_32FC2);
|
||||
Mat resp_ch;
|
||||
for(size_t i = 0; i < Ffeatures.size(); ++i) {
|
||||
mulSpectrums(Ffeatures[i], filter[i], resp_ch, 0 , true);
|
||||
res = res + resp_ch;
|
||||
}
|
||||
idft(res, res, DFT_SCALE | DFT_REAL_OUTPUT);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
void TrackerCSRTImpl::update_csr_filter(const Mat &image, const Mat &mask)
|
||||
{
|
||||
Mat patch = get_subwindow(image, object_center, cvFloor(current_scale_factor * template_size.width),
|
||||
cvFloor(current_scale_factor * template_size.height));
|
||||
resize(patch, patch, rescaled_template_size, 0, 0, INTER_CUBIC);
|
||||
|
||||
std::vector<Mat> ftrs = get_features(patch, yf.size());
|
||||
std::vector<Mat> Fftrs = fourier_transform_features(ftrs);
|
||||
std::vector<Mat> new_csr_filter = create_csr_filter(Fftrs, yf, mask);
|
||||
//calculate per channel weights
|
||||
if(params.use_channel_weights) {
|
||||
Mat current_resp;
|
||||
double max_val;
|
||||
float sum_weights = 0;
|
||||
std::vector<float> new_filter_weights = std::vector<float>(new_csr_filter.size());
|
||||
for(size_t i = 0; i < new_csr_filter.size(); ++i) {
|
||||
mulSpectrums(Fftrs[i], new_csr_filter[i], current_resp, 0, true);
|
||||
idft(current_resp, current_resp, DFT_SCALE | DFT_REAL_OUTPUT);
|
||||
minMaxLoc(current_resp, NULL, &max_val, NULL, NULL);
|
||||
sum_weights += static_cast<float>(max_val);
|
||||
new_filter_weights[i] = static_cast<float>(max_val);
|
||||
}
|
||||
//update filter weights with new values
|
||||
float updated_sum = 0;
|
||||
for(size_t i = 0; i < filter_weights.size(); ++i) {
|
||||
filter_weights[i] = filter_weights[i]*(1.0f - params.weights_lr) +
|
||||
params.weights_lr * (new_filter_weights[i] / sum_weights);
|
||||
updated_sum += filter_weights[i];
|
||||
}
|
||||
//normalize weights
|
||||
for(size_t i = 0; i < filter_weights.size(); ++i) {
|
||||
filter_weights[i] /= updated_sum;
|
||||
}
|
||||
}
|
||||
for(size_t i = 0; i < csr_filter.size(); ++i) {
|
||||
csr_filter[i] = (1.0f - params.filter_lr)*csr_filter[i] + params.filter_lr * new_csr_filter[i];
|
||||
}
|
||||
std::vector<Mat>().swap(ftrs);
|
||||
std::vector<Mat>().swap(Fftrs);
|
||||
}
|
||||
|
||||
|
||||
std::vector<Mat> TrackerCSRTImpl::get_features(const Mat &patch, const Size2i &feature_size)
|
||||
{
|
||||
std::vector<Mat> features;
|
||||
if (params.use_hog) {
|
||||
std::vector<Mat> hog = get_features_hog(patch, cell_size);
|
||||
features.insert(features.end(), hog.begin(),
|
||||
hog.begin()+params.num_hog_channels_used);
|
||||
}
|
||||
// TODO: restore color_names feature mode
|
||||
// if (params.use_color_names) {
|
||||
// std::vector<Mat> cn;
|
||||
// cn = get_features_cn(patch, feature_size);
|
||||
// features.insert(features.end(), cn.begin(), cn.end());
|
||||
// }
|
||||
if(params.use_gray) {
|
||||
Mat gray_m;
|
||||
cvtColor(patch, gray_m, COLOR_BGR2GRAY);
|
||||
resize(gray_m, gray_m, feature_size, 0, 0, INTER_CUBIC);
|
||||
gray_m.convertTo(gray_m, CV_32FC1, 1.0/255.0, -0.5);
|
||||
features.push_back(gray_m);
|
||||
}
|
||||
if(params.use_rgb) {
|
||||
std::vector<Mat> rgb_features = get_features_rgb(patch, feature_size);
|
||||
features.insert(features.end(), rgb_features.begin(), rgb_features.end());
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < features.size(); ++i) {
|
||||
features.at(i) = features.at(i).mul(window);
|
||||
}
|
||||
return features;
|
||||
}
|
||||
|
||||
class ParallelCreateCSRFilter : public ParallelLoopBody {
|
||||
public:
|
||||
ParallelCreateCSRFilter(
|
||||
const std::vector<cv::Mat> img_features_,
|
||||
const cv::Mat Y_,
|
||||
const cv::Mat P_,
|
||||
int admm_iterations_,
|
||||
std::vector<Mat> &result_filter_):
|
||||
result_filter(result_filter_)
|
||||
{
|
||||
this->img_features = img_features_;
|
||||
this->Y = Y_;
|
||||
this->P = P_;
|
||||
this->admm_iterations = admm_iterations_;
|
||||
}
|
||||
virtual void operator ()(const Range& range) const CV_OVERRIDE
|
||||
{
|
||||
for (int i = range.start; i < range.end; i++) {
|
||||
float mu = 5.0f;
|
||||
float beta = 3.0f;
|
||||
float mu_max = 20.0f;
|
||||
float lambda = mu / 100.0f;
|
||||
|
||||
Mat F = img_features[i];
|
||||
|
||||
Mat Sxy, Sxx;
|
||||
mulSpectrums(F, Y, Sxy, 0, true);
|
||||
mulSpectrums(F, F, Sxx, 0, true);
|
||||
|
||||
Mat H;
|
||||
H = divide_complex_matrices(Sxy, (Sxx + lambda));
|
||||
idft(H, H, DFT_SCALE|DFT_REAL_OUTPUT);
|
||||
H = H.mul(P);
|
||||
dft(H, H, DFT_COMPLEX_OUTPUT);
|
||||
Mat L = Mat::zeros(H.size(), H.type()); //Lagrangian multiplier
|
||||
Mat G;
|
||||
for(int iteration = 0; iteration < admm_iterations; ++iteration) {
|
||||
G = divide_complex_matrices((Sxy + (mu * H) - L) , (Sxx + mu));
|
||||
idft((mu * G) + L, H, DFT_SCALE | DFT_REAL_OUTPUT);
|
||||
float lm = 1.0f / (lambda+mu);
|
||||
H = H.mul(P*lm);
|
||||
dft(H, H, DFT_COMPLEX_OUTPUT);
|
||||
|
||||
//Update variables for next iteration
|
||||
L = L + mu * (G - H);
|
||||
mu = min(mu_max, beta*mu);
|
||||
}
|
||||
result_filter[i] = H;
|
||||
}
|
||||
}
|
||||
|
||||
ParallelCreateCSRFilter& operator=(const ParallelCreateCSRFilter &) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
private:
|
||||
int admm_iterations;
|
||||
Mat Y;
|
||||
Mat P;
|
||||
std::vector<Mat> img_features;
|
||||
std::vector<Mat> &result_filter;
|
||||
};
|
||||
|
||||
|
||||
std::vector<Mat> TrackerCSRTImpl::create_csr_filter(
|
||||
const std::vector<cv::Mat> img_features,
|
||||
const cv::Mat Y,
|
||||
const cv::Mat P)
|
||||
{
|
||||
std::vector<Mat> result_filter;
|
||||
result_filter.resize(img_features.size());
|
||||
ParallelCreateCSRFilter parallelCreateCSRFilter(img_features, Y, P,
|
||||
params.admm_iterations, result_filter);
|
||||
parallel_for_(Range(0, static_cast<int>(result_filter.size())), parallelCreateCSRFilter);
|
||||
|
||||
return result_filter;
|
||||
}
|
||||
|
||||
Mat TrackerCSRTImpl::get_location_prior(
|
||||
const Rect roi,
|
||||
const Size2f target_size,
|
||||
const Size img_sz)
|
||||
{
|
||||
int x1 = cvRound(max(min(roi.x-1, img_sz.width-1) , 0));
|
||||
int y1 = cvRound(max(min(roi.y-1, img_sz.height-1) , 0));
|
||||
|
||||
int x2 = cvRound(min(max(roi.width-1, 0) , img_sz.width-1));
|
||||
int y2 = cvRound(min(max(roi.height-1, 0) , img_sz.height-1));
|
||||
|
||||
Size target_sz;
|
||||
target_sz.width = target_sz.height = cvFloor(min(target_size.width, target_size.height));
|
||||
|
||||
double cx = x1 + (x2-x1)/2.;
|
||||
double cy = y1 + (y2-y1)/2.;
|
||||
double kernel_size_width = 1.0/(0.5*static_cast<double>(target_sz.width)*1.4142+1);
|
||||
double kernel_size_height = 1.0/(0.5*static_cast<double>(target_sz.height)*1.4142+1);
|
||||
|
||||
cv::Mat kernel_weight = Mat::zeros(1 + cvFloor(y2 - y1) , 1+cvFloor(-(x1-cx) + (x2-cx)), CV_64FC1);
|
||||
for (int y = y1; y < y2+1; ++y){
|
||||
double * weightPtr = kernel_weight.ptr<double>(y);
|
||||
double tmp_y = std::pow((cy-y)*kernel_size_height, 2);
|
||||
for (int x = x1; x < x2+1; ++x){
|
||||
weightPtr[x] = kernel_epan(std::pow((cx-x)*kernel_size_width,2) + tmp_y);
|
||||
}
|
||||
}
|
||||
|
||||
double max_val;
|
||||
cv::minMaxLoc(kernel_weight, NULL, &max_val, NULL, NULL);
|
||||
Mat fg_prior = kernel_weight / max_val;
|
||||
fg_prior.setTo(0.5, fg_prior < 0.5);
|
||||
fg_prior.setTo(0.9, fg_prior > 0.9);
|
||||
return fg_prior;
|
||||
}
|
||||
|
||||
Mat TrackerCSRTImpl::segment_region( const Mat &image, const Size &target_size, float scale_factor)
|
||||
{
|
||||
Rect valid_pixels;
|
||||
Mat patch = get_subwindow(image, object_center, cvFloor(scale_factor * template_size.width),
|
||||
cvFloor(scale_factor * template_size.height), &valid_pixels);
|
||||
Size2f scaled_target = Size2f(target_size.width * scale_factor,
|
||||
target_size.height * scale_factor);
|
||||
Mat fg_prior = get_location_prior(
|
||||
Rect(0,0, patch.size().width, patch.size().height),
|
||||
scaled_target , patch.size());
|
||||
|
||||
std::vector<Mat> img_channels;
|
||||
split(patch, img_channels);
|
||||
std::pair<Mat, Mat> probs = Segment::computePosteriors2(img_channels, 0, 0, patch.cols, patch.rows,
|
||||
p_b, fg_prior, 1.0-fg_prior, hist_foreground, hist_background);
|
||||
|
||||
Mat mask = Mat::zeros(probs.first.size(), probs.first.type());
|
||||
probs.first(valid_pixels).copyTo(mask(valid_pixels));
|
||||
double max_resp = get_max(mask);
|
||||
threshold(mask, mask, max_resp / 2.0, 1, THRESH_BINARY);
|
||||
mask.convertTo(mask, CV_32FC1, 1.0);
|
||||
return mask;
|
||||
}
|
||||
|
||||
|
||||
void TrackerCSRTImpl::extract_histograms(const Mat &image, cv::Rect region, Histogram &hf, Histogram &hb)
|
||||
{
|
||||
// get coordinates of the region
|
||||
int x1 = std::min(std::max(0, region.x), image.cols-1);
|
||||
int y1 = std::min(std::max(0, region.y), image.rows-1);
|
||||
int x2 = std::min(std::max(0, region.x + region.width), image.cols-1);
|
||||
int y2 = std::min(std::max(0, region.y + region.height), image.rows-1);
|
||||
|
||||
// calculate coordinates of the background region
|
||||
int offsetX = (x2-x1+1) / params.background_ratio;
|
||||
int offsetY = (y2-y1+1) / params.background_ratio;
|
||||
int outer_y1 = std::max(0, (int)(y1-offsetY));
|
||||
int outer_y2 = std::min(image.rows, (int)(y2+offsetY+1));
|
||||
int outer_x1 = std::max(0, (int)(x1-offsetX));
|
||||
int outer_x2 = std::min(image.cols, (int)(x2+offsetX+1));
|
||||
|
||||
// calculate probability for the background
|
||||
p_b = 1.0 - ((x2-x1+1) * (y2-y1+1)) /
|
||||
((double) (outer_x2-outer_x1+1) * (outer_y2-outer_y1+1));
|
||||
|
||||
// split multi-channel image into the std::vector of matrices
|
||||
std::vector<Mat> img_channels(image.channels());
|
||||
split(image, img_channels);
|
||||
for(size_t k=0; k<img_channels.size(); k++) {
|
||||
img_channels.at(k).convertTo(img_channels.at(k), CV_8UC1);
|
||||
}
|
||||
|
||||
hf.extractForegroundHistogram(img_channels, Mat(), false, x1, y1, x2, y2);
|
||||
hb.extractBackGroundHistogram(img_channels, x1, y1, x2, y2,
|
||||
outer_x1, outer_y1, outer_x2, outer_y2);
|
||||
std::vector<Mat>().swap(img_channels);
|
||||
}
|
||||
|
||||
void TrackerCSRTImpl::update_histograms(const Mat &image, const Rect ®ion)
|
||||
{
|
||||
// create temporary histograms
|
||||
Histogram hf(image.channels(), params.histogram_bins);
|
||||
Histogram hb(image.channels(), params.histogram_bins);
|
||||
extract_histograms(image, region, hf, hb);
|
||||
|
||||
// get histogram vectors from temporary histograms
|
||||
std::vector<double> hf_vect_new = hf.getHistogramVector();
|
||||
std::vector<double> hb_vect_new = hb.getHistogramVector();
|
||||
// get histogram vectors from learned histograms
|
||||
std::vector<double> hf_vect = hist_foreground.getHistogramVector();
|
||||
std::vector<double> hb_vect = hist_background.getHistogramVector();
|
||||
|
||||
// update histograms - use learning rate
|
||||
for(size_t i=0; i<hf_vect.size(); i++) {
|
||||
hf_vect_new[i] = (1-params.histogram_lr)*hf_vect[i] +
|
||||
params.histogram_lr*hf_vect_new[i];
|
||||
hb_vect_new[i] = (1-params.histogram_lr)*hb_vect[i] +
|
||||
params.histogram_lr*hb_vect_new[i];
|
||||
}
|
||||
|
||||
// set learned histograms
|
||||
hist_foreground.setHistogramVector(&hf_vect_new[0]);
|
||||
hist_background.setHistogramVector(&hb_vect_new[0]);
|
||||
|
||||
std::vector<double>().swap(hf_vect);
|
||||
std::vector<double>().swap(hb_vect);
|
||||
}
|
||||
|
||||
Point2f TrackerCSRTImpl::estimate_new_position(const Mat &image)
|
||||
{
|
||||
|
||||
Mat resp = calculate_response(image, csr_filter);
|
||||
|
||||
Point max_loc;
|
||||
minMaxLoc(resp, NULL, &last_score, NULL, &max_loc);
|
||||
if (last_score < params.psr_threshold)
|
||||
return Point2f(-1,-1); // target "lost"
|
||||
|
||||
// take into account also subpixel accuracy
|
||||
float col = ((float) max_loc.x) + subpixel_peak(resp, "horizontal", max_loc);
|
||||
float row = ((float) max_loc.y) + subpixel_peak(resp, "vertical", max_loc);
|
||||
if(row + 1 > (float)resp.rows / 2.0f) {
|
||||
row = row - resp.rows;
|
||||
}
|
||||
if(col + 1 > (float)resp.cols / 2.0f) {
|
||||
col = col - resp.cols;
|
||||
}
|
||||
// calculate x and y displacements
|
||||
Point2f new_center = object_center + Point2f(current_scale_factor * (1.0f / rescale_ratio) *cell_size*(col),
|
||||
current_scale_factor * (1.0f / rescale_ratio) *cell_size*(row));
|
||||
//sanity checks
|
||||
if(new_center.x < 0)
|
||||
new_center.x = 0;
|
||||
if(new_center.x >= image_size.width)
|
||||
new_center.x = static_cast<float>(image_size.width - 1);
|
||||
if(new_center.y < 0)
|
||||
new_center.y = 0;
|
||||
if(new_center.y >= image_size.height)
|
||||
new_center.y = static_cast<float>(image_size.height - 1);
|
||||
|
||||
return new_center;
|
||||
}
|
||||
|
||||
// *********************************************************************
|
||||
// * Update API function *
|
||||
// *********************************************************************
|
||||
bool TrackerCSRTImpl::update(InputArray image_, Rect& boundingBox)
|
||||
{
|
||||
Mat image;
|
||||
if(image_.channels() == 1) //treat gray image as color image
|
||||
cvtColor(image_, image, COLOR_GRAY2BGR);
|
||||
else
|
||||
image = image_.getMat();
|
||||
|
||||
object_center = estimate_new_position(image);
|
||||
if (object_center.x < 0 && object_center.y < 0)
|
||||
return false;
|
||||
|
||||
current_scale_factor = dsst.getScale(image, object_center);
|
||||
//update bouding_box according to new scale and location
|
||||
bounding_box.x = object_center.x - current_scale_factor * original_target_size.width / 2.0f;
|
||||
bounding_box.y = object_center.y - current_scale_factor * original_target_size.height / 2.0f;
|
||||
bounding_box.width = current_scale_factor * original_target_size.width;
|
||||
bounding_box.height = current_scale_factor * original_target_size.height;
|
||||
|
||||
//update tracker
|
||||
if(params.use_segmentation) {
|
||||
Mat hsv_img = bgr2hsv(image);
|
||||
update_histograms(hsv_img, bounding_box);
|
||||
filter_mask = segment_region(hsv_img, original_target_size, current_scale_factor);
|
||||
resize(filter_mask, filter_mask, yf.size(), 0, 0, INTER_NEAREST);
|
||||
if(check_mask_area(filter_mask, default_mask_area)) {
|
||||
dilate(filter_mask , filter_mask, erode_element);
|
||||
} else {
|
||||
filter_mask = default_mask;
|
||||
}
|
||||
} else {
|
||||
filter_mask = default_mask;
|
||||
}
|
||||
update_csr_filter(image, filter_mask);
|
||||
dsst.update(image, object_center);
|
||||
boundingBox = bounding_box;
|
||||
return true;
|
||||
}
|
||||
|
||||
float TrackerCSRTImpl::getTrackingScore() const
|
||||
{
|
||||
return static_cast<float>(last_score);
|
||||
}
|
||||
|
||||
// *********************************************************************
|
||||
// * Init API function *
|
||||
// *********************************************************************
|
||||
void TrackerCSRTImpl::init(InputArray image_, const Rect& boundingBox)
|
||||
{
|
||||
Mat image;
|
||||
if(image_.channels() == 1) //treat gray image as color image
|
||||
cvtColor(image_, image, COLOR_GRAY2BGR);
|
||||
else
|
||||
image = image_.getMat();
|
||||
|
||||
current_scale_factor = 1.0;
|
||||
image_size = image.size();
|
||||
bounding_box = boundingBox;
|
||||
cell_size = cvFloor(std::min(4.0, std::max(1.0, static_cast<double>(
|
||||
cvCeil((bounding_box.width * bounding_box.height)/400.0)))));
|
||||
original_target_size = Size(bounding_box.size());
|
||||
|
||||
template_size.width = static_cast<float>(cvFloor(original_target_size.width + params.padding *
|
||||
sqrt(original_target_size.width * original_target_size.height)));
|
||||
template_size.height = static_cast<float>(cvFloor(original_target_size.height + params.padding *
|
||||
sqrt(original_target_size.width * original_target_size.height)));
|
||||
template_size.width = template_size.height =
|
||||
(template_size.width + template_size.height) / 2.0f;
|
||||
rescale_ratio = sqrt((params.template_size * params.template_size) / (template_size.width * template_size.height));
|
||||
if(rescale_ratio > 1) {
|
||||
rescale_ratio = 1;
|
||||
}
|
||||
rescaled_template_size = Size2i(cvFloor(template_size.width * rescale_ratio),
|
||||
cvFloor(template_size.height * rescale_ratio));
|
||||
object_center = Point2f(static_cast<float>(boundingBox.x) + original_target_size.width / 2.0f,
|
||||
static_cast<float>(boundingBox.y) + original_target_size.height / 2.0f);
|
||||
|
||||
yf = gaussian_shaped_labels(params.gsl_sigma,
|
||||
rescaled_template_size.width / cell_size, rescaled_template_size.height / cell_size);
|
||||
if(params.window_function.compare("hann") == 0) {
|
||||
window = get_hann_win(Size(yf.cols,yf.rows));
|
||||
} else if(params.window_function.compare("cheb") == 0) {
|
||||
window = get_chebyshev_win(Size(yf.cols,yf.rows), params.cheb_attenuation);
|
||||
} else if(params.window_function.compare("kaiser") == 0) {
|
||||
window = get_kaiser_win(Size(yf.cols,yf.rows), params.kaiser_alpha);
|
||||
} else {
|
||||
CV_Error(Error::StsBadArg, "Not a valid window function");
|
||||
}
|
||||
|
||||
Size2i scaled_obj_size = Size2i(cvFloor(original_target_size.width * rescale_ratio / cell_size),
|
||||
cvFloor(original_target_size.height * rescale_ratio / cell_size));
|
||||
//set dummy mask and area;
|
||||
int x0 = std::max((yf.size().width - scaled_obj_size.width)/2 - 1, 0);
|
||||
int y0 = std::max((yf.size().height - scaled_obj_size.height)/2 - 1, 0);
|
||||
default_mask = Mat::zeros(yf.size(), CV_32FC1);
|
||||
default_mask(Rect(x0,y0,scaled_obj_size.width, scaled_obj_size.height)) = 1.0f;
|
||||
default_mask_area = static_cast<float>(sum(default_mask)[0]);
|
||||
|
||||
//initalize segmentation
|
||||
if(params.use_segmentation) {
|
||||
Mat hsv_img = bgr2hsv(image);
|
||||
hist_foreground = Histogram(hsv_img.channels(), params.histogram_bins);
|
||||
hist_background = Histogram(hsv_img.channels(), params.histogram_bins);
|
||||
extract_histograms(hsv_img, bounding_box, hist_foreground, hist_background);
|
||||
filter_mask = segment_region(hsv_img, original_target_size, current_scale_factor);
|
||||
//update calculated mask with preset mask
|
||||
if(preset_mask.data){
|
||||
Mat preset_mask_padded = Mat::zeros(filter_mask.size(), filter_mask.type());
|
||||
int sx = std::max((int)cvFloor(preset_mask_padded.cols / 2.0f - preset_mask.cols / 2.0f) - 1, 0);
|
||||
int sy = std::max((int)cvFloor(preset_mask_padded.rows / 2.0f - preset_mask.rows / 2.0f) - 1, 0);
|
||||
preset_mask.copyTo(preset_mask_padded(
|
||||
Rect(sx, sy, preset_mask.cols, preset_mask.rows)));
|
||||
filter_mask = filter_mask.mul(preset_mask_padded);
|
||||
}
|
||||
erode_element = getStructuringElement(MORPH_ELLIPSE, Size(3,3), Point(1,1));
|
||||
resize(filter_mask, filter_mask, yf.size(), 0, 0, INTER_NEAREST);
|
||||
if(check_mask_area(filter_mask, default_mask_area)) {
|
||||
dilate(filter_mask , filter_mask, erode_element);
|
||||
} else {
|
||||
filter_mask = default_mask;
|
||||
}
|
||||
|
||||
} else {
|
||||
filter_mask = default_mask;
|
||||
}
|
||||
|
||||
//initialize filter
|
||||
Mat patch = get_subwindow(image, object_center, cvFloor(current_scale_factor * template_size.width),
|
||||
cvFloor(current_scale_factor * template_size.height));
|
||||
resize(patch, patch, rescaled_template_size, 0, 0, INTER_CUBIC);
|
||||
std::vector<Mat> patch_ftrs = get_features(patch, yf.size());
|
||||
std::vector<Mat> Fftrs = fourier_transform_features(patch_ftrs);
|
||||
csr_filter = create_csr_filter(Fftrs, yf, filter_mask);
|
||||
|
||||
if(params.use_channel_weights) {
|
||||
Mat current_resp;
|
||||
filter_weights = std::vector<float>(csr_filter.size());
|
||||
float chw_sum = 0;
|
||||
for (size_t i = 0; i < csr_filter.size(); ++i) {
|
||||
mulSpectrums(Fftrs[i], csr_filter[i], current_resp, 0, true);
|
||||
idft(current_resp, current_resp, DFT_SCALE | DFT_REAL_OUTPUT);
|
||||
double max_val;
|
||||
minMaxLoc(current_resp, NULL, &max_val, NULL , NULL);
|
||||
chw_sum += static_cast<float>(max_val);
|
||||
filter_weights[i] = static_cast<float>(max_val);
|
||||
}
|
||||
for (size_t i = 0; i < filter_weights.size(); ++i) {
|
||||
filter_weights[i] /= chw_sum;
|
||||
}
|
||||
}
|
||||
|
||||
//initialize scale search
|
||||
dsst = DSST(image, bounding_box, template_size, params.number_of_scales, params.scale_step,
|
||||
params.scale_model_max_area, params.scale_sigma_factor, params.scale_lr);
|
||||
|
||||
model=makePtr<TrackerCSRTModel>();
|
||||
}
|
||||
|
||||
}}} // cv::detail::tracking
|
||||
|
||||
//==============================================================================
|
||||
|
||||
namespace cv {
|
||||
|
||||
TrackerCSRT::Params::Params()
|
||||
{
|
||||
use_channel_weights = true;
|
||||
use_segmentation = true;
|
||||
use_hog = true;
|
||||
use_color_names = true;
|
||||
use_gray = true;
|
||||
use_rgb = false;
|
||||
window_function = "hann";
|
||||
kaiser_alpha = 3.75f;
|
||||
cheb_attenuation = 45;
|
||||
padding = 3.0f;
|
||||
template_size = 200;
|
||||
gsl_sigma = 1.0f;
|
||||
hog_orientations = 9;
|
||||
hog_clip = 0.2f;
|
||||
num_hog_channels_used = 18;
|
||||
filter_lr = 0.02f;
|
||||
weights_lr = 0.02f;
|
||||
admm_iterations = 4;
|
||||
number_of_scales = 33;
|
||||
scale_sigma_factor = 0.250f;
|
||||
scale_model_max_area = 512.0f;
|
||||
scale_lr = 0.025f;
|
||||
scale_step = 1.020f;
|
||||
histogram_bins = 16;
|
||||
background_ratio = 2;
|
||||
histogram_lr = 0.04f;
|
||||
psr_threshold = 0.035f;
|
||||
}
|
||||
|
||||
|
||||
TrackerCSRT::TrackerCSRT() { }
|
||||
|
||||
TrackerCSRT::~TrackerCSRT() { }
|
||||
|
||||
Ptr<TrackerCSRT> TrackerCSRT::create(const TrackerCSRT::Params ¶meters)
|
||||
{
|
||||
return makePtr<cv::detail::tracking::TrackerCSRTImpl>(parameters);
|
||||
}
|
||||
|
||||
} // cv::
|
203
modules/video/src/tracking/tracker_csrt_scale_estimation.cpp
Normal file
203
modules/video/src/tracking/tracker_csrt_scale_estimation.cpp
Normal file
@ -0,0 +1,203 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "../precomp.hpp"
|
||||
|
||||
#include "tracker_csrt_scale_estimation.hpp"
|
||||
#include "tracker_csrt_utils.hpp"
|
||||
|
||||
//Discriminative Scale Space Tracking
|
||||
namespace cv
|
||||
{
|
||||
|
||||
class ParallelGetScaleFeatures : public ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
ParallelGetScaleFeatures(
|
||||
Mat img_,
|
||||
Point2f pos_,
|
||||
Size2f base_target_sz_,
|
||||
float current_scale_,
|
||||
std::vector<float> &scale_factors_,
|
||||
Mat scale_window_,
|
||||
Size scale_model_sz_,
|
||||
int col_len_,
|
||||
Mat &result_)
|
||||
{
|
||||
this->img = img_;
|
||||
this->pos = pos_;
|
||||
this->base_target_sz = base_target_sz_;
|
||||
this->current_scale = current_scale_;
|
||||
this->scale_factors = scale_factors_;
|
||||
this->scale_window = scale_window_;
|
||||
this->scale_model_sz = scale_model_sz_;
|
||||
this->col_len = col_len_;
|
||||
this->result = result_;
|
||||
}
|
||||
virtual void operator ()(const Range& range) const CV_OVERRIDE
|
||||
{
|
||||
for (int s = range.start; s < range.end; s++) {
|
||||
Size patch_sz = Size(static_cast<int>(current_scale * scale_factors[s] * base_target_sz.width),
|
||||
static_cast<int>(current_scale * scale_factors[s] * base_target_sz.height));
|
||||
Mat img_patch = get_subwindow(img, pos, patch_sz.width, patch_sz.height);
|
||||
img_patch.convertTo(img_patch, CV_32FC3);
|
||||
resize(img_patch, img_patch, Size(scale_model_sz.width, scale_model_sz.height),0,0,INTER_LINEAR);
|
||||
std::vector<Mat> hog;
|
||||
hog = get_features_hog(img_patch, 4);
|
||||
for (int i = 0; i < static_cast<int>(hog.size()); ++i) {
|
||||
hog[i] = hog[i].t();
|
||||
hog[i] = scale_window.at<float>(0,s) * hog[i].reshape(0, col_len);
|
||||
hog[i].copyTo(result(Rect(Point(s, i*col_len), hog[i].size())));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ParallelGetScaleFeatures& operator=(const ParallelGetScaleFeatures &) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
private:
|
||||
Mat img;
|
||||
Point2f pos;
|
||||
Size2f base_target_sz;
|
||||
float current_scale;
|
||||
std::vector<float> scale_factors;
|
||||
Mat scale_window;
|
||||
Size scale_model_sz;
|
||||
int col_len;
|
||||
Mat result;
|
||||
};
|
||||
|
||||
|
||||
DSST::DSST(const Mat &image,
|
||||
Rect2f bounding_box,
|
||||
Size2f template_size,
|
||||
int numberOfScales,
|
||||
float scaleStep,
|
||||
float maxModelArea,
|
||||
float sigmaFactor,
|
||||
float scaleLearnRate):
|
||||
scales_count(numberOfScales), scale_step(scaleStep), max_model_area(maxModelArea),
|
||||
sigma_factor(sigmaFactor), learn_rate(scaleLearnRate)
|
||||
{
|
||||
original_targ_sz = bounding_box.size();
|
||||
Point2f object_center = Point2f(
|
||||
bounding_box.x + static_cast<float>(original_targ_sz.width) / 2.f,
|
||||
bounding_box.y + static_cast<float>(original_targ_sz.height) / 2.f
|
||||
);
|
||||
|
||||
current_scale_factor = 1.0;
|
||||
if(scales_count % 2 == 0)
|
||||
scales_count++;
|
||||
|
||||
scale_sigma = static_cast<float>(sqrt(scales_count) * sigma_factor);
|
||||
|
||||
min_scale_factor = static_cast<float>(pow(scale_step,
|
||||
cvCeil(log(max(5.0 / template_size.width, 5.0 / template_size.height)) / log(scale_step))));
|
||||
max_scale_factor = static_cast<float>(pow(scale_step,
|
||||
cvFloor(log(min((float)image.rows / (float)bounding_box.width,
|
||||
(float)image.cols / (float)bounding_box.height)) / log(scale_step))));
|
||||
ys = Mat(1, scales_count, CV_32FC1);
|
||||
float ss, sf;
|
||||
for(int i = 0; i < ys.cols; ++i) {
|
||||
ss = (float)(i+1) - cvCeil((float)scales_count / 2.0f);
|
||||
ys.at<float>(0,i) = static_cast<float>(exp(-0.5 * pow(ss,2) / pow(scale_sigma,2)));
|
||||
sf = static_cast<float>(i + 1);
|
||||
scale_factors.push_back(pow(scale_step, cvCeil((float)scales_count / 2.0f) - sf));
|
||||
}
|
||||
|
||||
scale_window = get_hann_win(Size(scales_count, 1));
|
||||
|
||||
float scale_model_factor = 1.0;
|
||||
if(template_size.width * template_size.height * pow(scale_model_factor, 2) > max_model_area)
|
||||
{
|
||||
scale_model_factor = sqrt(max_model_area /
|
||||
(template_size.width * template_size.height));
|
||||
}
|
||||
scale_model_sz = Size(cvFloor(template_size.width * scale_model_factor),
|
||||
cvFloor(template_size.height * scale_model_factor));
|
||||
|
||||
Mat scale_resp = get_scale_features(image, object_center, original_targ_sz, current_scale_factor);
|
||||
|
||||
Mat ysf_row = Mat(ys.size(), CV_32FC2);
|
||||
dft(ys, ysf_row, DFT_ROWS | DFT_COMPLEX_OUTPUT, 0);
|
||||
ysf = repeat(ysf_row, scale_resp.rows, 1);
|
||||
Mat Fscale_resp;
|
||||
dft(scale_resp, Fscale_resp, DFT_ROWS | DFT_COMPLEX_OUTPUT);
|
||||
mulSpectrums(ysf, Fscale_resp, sf_num, 0 , true);
|
||||
Mat sf_den_all;
|
||||
mulSpectrums(Fscale_resp, Fscale_resp, sf_den_all, 0, true);
|
||||
reduce(sf_den_all, sf_den, 0, REDUCE_SUM, -1);
|
||||
}
|
||||
|
||||
DSST::~DSST()
|
||||
{
|
||||
}
|
||||
|
||||
Mat DSST::get_scale_features( Mat img, Point2f pos, Size2f base_target_sz, float current_scale)
|
||||
{
|
||||
Mat result;
|
||||
int col_len = 0;
|
||||
Size patch_sz = Size(cvFloor(current_scale * scale_factors[0] * base_target_sz.width),
|
||||
cvFloor(current_scale * scale_factors[0] * base_target_sz.height));
|
||||
Mat img_patch = get_subwindow(img, pos, patch_sz.width, patch_sz.height);
|
||||
img_patch.convertTo(img_patch, CV_32FC3);
|
||||
resize(img_patch, img_patch, Size(scale_model_sz.width, scale_model_sz.height),0,0,INTER_LINEAR);
|
||||
std::vector<Mat> hog;
|
||||
hog = get_features_hog(img_patch, 4);
|
||||
result = Mat(Size((int)scale_factors.size(), hog[0].cols * hog[0].rows * (int)hog.size()), CV_32F);
|
||||
col_len = hog[0].cols * hog[0].rows;
|
||||
for (int i = 0; i < static_cast<int>(hog.size()); ++i) {
|
||||
hog[i] = hog[i].t();
|
||||
hog[i] = scale_window.at<float>(0,0) * hog[i].reshape(0, col_len);
|
||||
hog[i].copyTo(result(Rect(Point(0, i*col_len), hog[i].size())));
|
||||
}
|
||||
|
||||
ParallelGetScaleFeatures parallelGetScaleFeatures(img, pos, base_target_sz,
|
||||
current_scale, scale_factors, scale_window, scale_model_sz, col_len, result);
|
||||
parallel_for_(Range(1, static_cast<int>(scale_factors.size())), parallelGetScaleFeatures);
|
||||
return result;
|
||||
}
|
||||
|
||||
void DSST::update(const Mat &image, const Point2f object_center)
|
||||
{
|
||||
Mat scale_features = get_scale_features(image, object_center, original_targ_sz, current_scale_factor);
|
||||
Mat Fscale_features;
|
||||
dft(scale_features, Fscale_features, DFT_ROWS | DFT_COMPLEX_OUTPUT);
|
||||
Mat new_sf_num;
|
||||
Mat new_sf_den;
|
||||
Mat new_sf_den_all;
|
||||
mulSpectrums(ysf, Fscale_features, new_sf_num, DFT_ROWS, true);
|
||||
Mat sf_den_all;
|
||||
mulSpectrums(Fscale_features, Fscale_features, new_sf_den_all, DFT_ROWS, true);
|
||||
reduce(new_sf_den_all, new_sf_den, 0, REDUCE_SUM, -1);
|
||||
|
||||
sf_num = (1 - learn_rate) * sf_num + learn_rate * new_sf_num;
|
||||
sf_den = (1 - learn_rate) * sf_den + learn_rate * new_sf_den;
|
||||
}
|
||||
|
||||
float DSST::getScale(const Mat &image, const Point2f object_center)
|
||||
{
|
||||
Mat scale_features = get_scale_features(image, object_center, original_targ_sz, current_scale_factor);
|
||||
|
||||
Mat Fscale_features;
|
||||
dft(scale_features, Fscale_features, DFT_ROWS | DFT_COMPLEX_OUTPUT);
|
||||
|
||||
mulSpectrums(Fscale_features, sf_num, Fscale_features, 0, false);
|
||||
Mat scale_resp;
|
||||
reduce(Fscale_features, scale_resp, 0, REDUCE_SUM, -1);
|
||||
scale_resp = divide_complex_matrices(scale_resp, sf_den + 0.01f);
|
||||
idft(scale_resp, scale_resp, DFT_REAL_OUTPUT|DFT_SCALE);
|
||||
Point max_loc;
|
||||
minMaxLoc(scale_resp, NULL, NULL, NULL, &max_loc);
|
||||
|
||||
current_scale_factor *= scale_factors[max_loc.x];
|
||||
if(current_scale_factor < min_scale_factor)
|
||||
current_scale_factor = min_scale_factor;
|
||||
else if(current_scale_factor > max_scale_factor)
|
||||
current_scale_factor = max_scale_factor;
|
||||
|
||||
return current_scale_factor;
|
||||
}
|
||||
} /* namespace cv */
|
46
modules/video/src/tracking/tracker_csrt_scale_estimation.hpp
Normal file
46
modules/video/src/tracking/tracker_csrt_scale_estimation.hpp
Normal file
@ -0,0 +1,46 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_TRACKER_CSRT_SCALE_ESTIMATION
|
||||
#define OPENCV_TRACKER_CSRT_SCALE_ESTIMATION
|
||||
|
||||
#include "opencv2/core/mat.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
class DSST {
|
||||
public:
|
||||
DSST() {};
|
||||
DSST(const Mat &image, Rect2f bounding_box, Size2f template_size, int numberOfScales,
|
||||
float scaleStep, float maxModelArea, float sigmaFactor, float scaleLearnRate);
|
||||
~DSST();
|
||||
void update(const Mat &image, const Point2f objectCenter);
|
||||
float getScale(const Mat &image, const Point2f objecCenter);
|
||||
private:
|
||||
Mat get_scale_features(Mat img, Point2f pos, Size2f base_target_sz, float current_scale);
|
||||
|
||||
Size scale_model_sz;
|
||||
Mat ys;
|
||||
Mat ysf;
|
||||
Mat scale_window;
|
||||
std::vector<float> scale_factors;
|
||||
Mat sf_num;
|
||||
Mat sf_den;
|
||||
float scale_sigma;
|
||||
float min_scale_factor;
|
||||
float max_scale_factor;
|
||||
float current_scale_factor;
|
||||
int scales_count;
|
||||
float scale_step;
|
||||
float max_model_area;
|
||||
float sigma_factor;
|
||||
float learn_rate;
|
||||
|
||||
Size original_targ_sz;
|
||||
};
|
||||
|
||||
} /* namespace cv */
|
||||
|
||||
#endif
|
450
modules/video/src/tracking/tracker_csrt_segmentation.cpp
Normal file
450
modules/video/src/tracking/tracker_csrt_segmentation.cpp
Normal file
@ -0,0 +1,450 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "../precomp.hpp"
|
||||
|
||||
#include "tracker_csrt_segmentation.hpp"
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
|
||||
//-------------------- HISTOGRAM CLASS --------------------
|
||||
namespace cv
|
||||
{
|
||||
|
||||
Histogram::Histogram(int numDimensions, int numBinsPerDimension)
|
||||
{
|
||||
m_numBinsPerDim = numBinsPerDimension;
|
||||
m_numDim = numDimensions;
|
||||
p_size = cvFloor(std::pow(m_numBinsPerDim, m_numDim));
|
||||
p_bins.resize(p_size, 0);
|
||||
p_dimIdCoef.resize(m_numDim, 1);
|
||||
for (int i = 0; i < m_numDim-1; ++i)
|
||||
p_dimIdCoef[i] = static_cast<int>(std::pow(numBinsPerDimension, m_numDim - 1 - i));
|
||||
|
||||
}
|
||||
|
||||
void Histogram::extractForegroundHistogram(std::vector<cv::Mat> & imgChannels,
|
||||
cv::Mat weights, bool useMatWeights, int x1, int y1, int x2, int y2)
|
||||
{
|
||||
//just for code clarity
|
||||
cv::Mat & img = imgChannels[0];
|
||||
|
||||
if (!useMatWeights){
|
||||
//weights are epanechnikov distr. with peek at the center of the image;
|
||||
double cx = x1 + (x2-x1)/2.;
|
||||
double cy = y1 + (y2-y1)/2.;
|
||||
double kernelSize_width = 1.0/(0.5*static_cast<double>(x2-x1)*1.4142+1); //sqrt(2)
|
||||
double kernelSize_height = 1.0/(0.5*static_cast<double>(y2-y1)*1.4142+1);
|
||||
|
||||
cv::Mat kernelWeight(img.rows, img.cols, CV_64FC1);
|
||||
for (int y = y1; y < y2+1; ++y){
|
||||
double * weightPtr = kernelWeight.ptr<double>(y);
|
||||
double tmp_y = std::pow((cy-y)*kernelSize_height, 2);
|
||||
for (int x = x1; x < x2+1; ++x){
|
||||
weightPtr[x] = kernelProfile_Epanechnikov(std::pow((cx-x)*kernelSize_width,2) + tmp_y);
|
||||
}
|
||||
}
|
||||
weights = kernelWeight;
|
||||
}
|
||||
//extract pixel values and compute histogram
|
||||
double rangePerBinInverse = static_cast<double>(m_numBinsPerDim)/256.0; // 1 / (imgRange/numBinsPerDim)
|
||||
double sum = 0;
|
||||
for (int y = y1; y < y2+1; ++y){
|
||||
std::vector<const uchar *> dataPtr(m_numDim);
|
||||
for (int dim = 0; dim < m_numDim; ++dim)
|
||||
dataPtr[dim] = imgChannels[dim].ptr<uchar>(y);
|
||||
const double * weightPtr = weights.ptr<double>(y);
|
||||
|
||||
for (int x = x1; x < x2+1; ++x){
|
||||
int id = 0;
|
||||
for (int dim = 0; dim < m_numDim; ++dim){
|
||||
id += p_dimIdCoef[dim]*cvFloor(rangePerBinInverse*dataPtr[dim][x]);
|
||||
}
|
||||
p_bins[id] += weightPtr[x];
|
||||
sum += weightPtr[x];
|
||||
}
|
||||
}
|
||||
//normalize
|
||||
sum = 1./sum;
|
||||
for(int i = 0; i < p_size; ++i)
|
||||
p_bins[i] *= sum;
|
||||
}
|
||||
|
||||
void Histogram::extractBackGroundHistogram(
|
||||
std::vector<cv::Mat> & imgChannels,
|
||||
int x1, int y1, int x2, int y2,
|
||||
int outer_x1, int outer_y1, int outer_x2, int outer_y2)
|
||||
{
|
||||
//extract pixel values and compute histogram
|
||||
double rangePerBinInverse = static_cast<double>(m_numBinsPerDim)/256.0; // 1 / (imgRange/numBinsPerDim)
|
||||
double sum = 0;
|
||||
for (int y = outer_y1; y < outer_y2; ++y){
|
||||
|
||||
std::vector<const uchar *> dataPtr(m_numDim);
|
||||
for (int dim = 0; dim < m_numDim; ++dim)
|
||||
dataPtr[dim] = imgChannels[dim].ptr<uchar>(y);
|
||||
|
||||
for (int x = outer_x1; x < outer_x2; ++x){
|
||||
if (x >= x1 && x <= x2 && y >= y1 && y <= y2)
|
||||
continue;
|
||||
|
||||
int id = 0;
|
||||
for (int dim = 0; dim < m_numDim; ++dim){
|
||||
id += p_dimIdCoef[dim]*cvFloor(rangePerBinInverse*dataPtr[dim][x]);
|
||||
}
|
||||
p_bins[id] += 1.0;
|
||||
sum += 1.0;
|
||||
}
|
||||
}
|
||||
//normalize
|
||||
sum = 1./sum;
|
||||
for(int i = 0; i < p_size; ++i)
|
||||
p_bins[i] *= sum;
|
||||
}
|
||||
|
||||
cv::Mat Histogram::backProject(std::vector<cv::Mat> & imgChannels)
|
||||
{
|
||||
//just for code clarity
|
||||
cv::Mat & img = imgChannels[0];
|
||||
|
||||
cv::Mat backProject(img.rows, img.cols, CV_64FC1);
|
||||
double rangePerBinInverse = static_cast<double>(m_numBinsPerDim)/256.0; // 1 / (imgRange/numBinsPerDim)
|
||||
|
||||
for (int y = 0; y < img.rows; ++y){
|
||||
double * backProjectPtr = backProject.ptr<double>(y);
|
||||
std::vector<const uchar *> dataPtr(m_numDim);
|
||||
for (int dim = 0; dim < m_numDim; ++dim)
|
||||
dataPtr[dim] = imgChannels[dim].ptr<uchar>(y);
|
||||
|
||||
for (int x = 0; x < img.cols; ++x){
|
||||
int id = 0;
|
||||
for (int dim = 0; dim < m_numDim; ++dim){
|
||||
id += p_dimIdCoef[dim]*cvFloor(rangePerBinInverse*dataPtr[dim][x]);
|
||||
}
|
||||
backProjectPtr[x] = p_bins[id];
|
||||
}
|
||||
}
|
||||
return backProject;
|
||||
}
|
||||
|
||||
// add new methods
|
||||
std::vector<double> Histogram::getHistogramVector() {
|
||||
return p_bins;
|
||||
}
|
||||
|
||||
void Histogram::setHistogramVector(double *vector) {
|
||||
for (size_t i=0; i<p_bins.size(); i++) {
|
||||
p_bins[i] = vector[i];
|
||||
}
|
||||
}
|
||||
|
||||
//-------------------- SEGMENT CLASS --------------------
|
||||
std::pair<cv::Mat, cv::Mat> Segment::computePosteriors(
|
||||
std::vector<cv::Mat> &imgChannels,
|
||||
int x1, int y1, int x2, int y2,
|
||||
cv::Mat weights, cv::Mat fgPrior, cv::Mat bgPrior,
|
||||
const Histogram &fgHistPrior, int numBinsPerChannel)
|
||||
{
|
||||
//preprocess and normalize all data
|
||||
CV_Assert(imgChannels.size() > 0);
|
||||
|
||||
//fit target to the image
|
||||
x1 = std::min(std::max(x1, 0), imgChannels[0].cols-1);
|
||||
y1 = std::min(std::max(y1, 0), imgChannels[0].rows-1);
|
||||
x2 = std::max(std::min(x2, imgChannels[0].cols-1), 0);
|
||||
y2 = std::max(std::min(y2, imgChannels[0].rows-1), 0);
|
||||
|
||||
//enlarge bbox by 1/3 of its size for background area
|
||||
int offsetX = (x2-x1)/3;
|
||||
int offsetY = (y2-y1)/3;
|
||||
int outer_y1 = std::max(0, (int)(y1-offsetY));
|
||||
int outer_y2 = std::min(imgChannels[0].rows, (int)(y2+offsetY+1));
|
||||
int outer_x1 = std::max(0, (int)(x1-offsetX));
|
||||
int outer_x2 = std::min(imgChannels[0].cols, (int)(x2+offsetX+1));
|
||||
|
||||
//extract histogram from original data -> more pixels better representation of distr. by histograms
|
||||
Histogram hist_target =
|
||||
(fgHistPrior.m_numBinsPerDim == numBinsPerChannel && (size_t)fgHistPrior.m_numDim == imgChannels.size())
|
||||
? fgHistPrior : Histogram(static_cast<int>(imgChannels.size()), numBinsPerChannel);
|
||||
Histogram hist_background(static_cast<int>(imgChannels.size()), numBinsPerChannel);
|
||||
if (weights.cols == 0)
|
||||
hist_target.extractForegroundHistogram(imgChannels, cv::Mat(), false, x1, y1, x2, y2);
|
||||
else
|
||||
hist_target.extractForegroundHistogram(imgChannels, weights, true, x1, y1, x2, y2);
|
||||
hist_background.extractBackGroundHistogram(imgChannels, x1, y1, x2, y2,
|
||||
outer_x1, outer_y1, outer_x2, outer_y2);
|
||||
|
||||
//compute resize factor so that the max area is 1000 (=avg. size ~ 32x32)
|
||||
double factor = sqrt(1000.0/((x2-x1)*(y2-y1)));
|
||||
if (factor > 1)
|
||||
factor = 1.0;
|
||||
cv::Size newSize(cvFloor((x2-x1)*factor), cvFloor((y2-y1)*factor));
|
||||
|
||||
//rescale input data
|
||||
cv::Rect roiRect_inner = cv::Rect(x1, y1, x2-x1, y2-y1);
|
||||
std::vector<cv::Mat> imgChannelsROI_inner(imgChannels.size());
|
||||
for (size_t i = 0; i < imgChannels.size(); ++i)
|
||||
cv::resize(imgChannels[i](roiRect_inner), imgChannelsROI_inner[i], newSize);
|
||||
|
||||
//initialize priors if there is no external source and rescale
|
||||
cv::Mat fgPriorScaled;
|
||||
if (fgPrior.cols == 0)
|
||||
fgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1);
|
||||
else
|
||||
cv::resize(fgPrior(roiRect_inner), fgPriorScaled, newSize);
|
||||
cv::Mat bgPriorScaled;
|
||||
if (bgPrior.cols == 0)
|
||||
bgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1);
|
||||
else
|
||||
cv::resize(bgPrior(roiRect_inner), bgPriorScaled, newSize);
|
||||
|
||||
//backproject pixels likelihood
|
||||
cv::Mat foregroundLikelihood = hist_target.backProject(imgChannelsROI_inner).mul(fgPriorScaled);
|
||||
cv::Mat backgroundLikelihood = hist_background.backProject(imgChannelsROI_inner).mul(bgPriorScaled);
|
||||
|
||||
double p_b = std::sqrt((std::pow(outer_x2-outer_x1, 2) + std::pow(outer_y2-outer_y1, 2)) /
|
||||
(std::pow(x2-x1, 2) + std::pow(y2-y1, 2))) ;
|
||||
double p_o = 1./(p_b + 1);
|
||||
|
||||
//convert likelihoods to posterior prob. (Bayes rule)
|
||||
cv::Mat prob_o(newSize, foregroundLikelihood.type());
|
||||
prob_o = p_o*foregroundLikelihood / (p_o*foregroundLikelihood + p_b*backgroundLikelihood);
|
||||
cv::Mat prob_b = 1.0 - prob_o;
|
||||
|
||||
std::pair<cv::Mat, cv::Mat> sizedProbs = getRegularizedSegmentation(prob_o, prob_b, fgPriorScaled, bgPriorScaled);
|
||||
|
||||
//resize probs to original size
|
||||
std::pair<cv::Mat, cv::Mat> probs;
|
||||
cv::resize(sizedProbs.first, probs.first, cv::Size(roiRect_inner.width, roiRect_inner.height));
|
||||
cv::resize(sizedProbs.second, probs.second, cv::Size(roiRect_inner.width, roiRect_inner.height));
|
||||
|
||||
return probs;
|
||||
}
|
||||
|
||||
std::pair<cv::Mat, cv::Mat> Segment::computePosteriors2(
|
||||
std::vector<cv::Mat> &imgChannels, int x1, int y1, int x2, int y2, double p_b,
|
||||
cv::Mat fgPrior, cv::Mat bgPrior, Histogram hist_target, Histogram hist_background)
|
||||
{
|
||||
//preprocess and normalize all data
|
||||
CV_Assert(imgChannels.size() > 0);
|
||||
|
||||
//fit target to the image
|
||||
x1 = std::min(std::max(x1, 0), imgChannels[0].cols-1);
|
||||
y1 = std::min(std::max(y1, 0), imgChannels[0].rows-1);
|
||||
x2 = std::max(std::min(x2, imgChannels[0].cols-1), 0);
|
||||
y2 = std::max(std::min(y2, imgChannels[0].rows-1), 0);
|
||||
|
||||
// calculate width and height of the region
|
||||
int w = x2 - x1 + 1;
|
||||
int h = y2 - y1 + 1;
|
||||
w = std::min(std::max(w, 1), imgChannels[0].cols);
|
||||
h = std::min(std::max(h, 1), imgChannels[0].rows);
|
||||
|
||||
//double p_o = 1./(p_b + 1);
|
||||
double p_o = 1. - p_b;
|
||||
|
||||
//compute resize factor so that the max area is 1000 (=avg. size ~ 32x32)
|
||||
double factor = sqrt(1000.0/(w*h));
|
||||
if (factor > 1)
|
||||
factor = 1.0;
|
||||
cv::Size newSize(cvFloor(w*factor), cvFloor(h*factor));
|
||||
|
||||
//rescale input data
|
||||
cv::Rect roiRect_inner = cv::Rect(x1, y1, w, h);
|
||||
std::vector<cv::Mat> imgChannelsROI_inner(imgChannels.size());
|
||||
for (size_t i = 0; i < imgChannels.size(); ++i)
|
||||
cv::resize(imgChannels[i](roiRect_inner), imgChannelsROI_inner[i], newSize);
|
||||
|
||||
//initialize priors if there is no external source and rescale
|
||||
cv::Mat fgPriorScaled;
|
||||
if (fgPrior.cols == 0)
|
||||
fgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1);
|
||||
else
|
||||
cv::resize(fgPrior(roiRect_inner), fgPriorScaled, newSize);
|
||||
cv::Mat bgPriorScaled;
|
||||
if (bgPrior.cols == 0)
|
||||
bgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1);
|
||||
else
|
||||
cv::resize(bgPrior(roiRect_inner), bgPriorScaled, newSize);
|
||||
|
||||
//backproject pixels likelihood
|
||||
cv::Mat foregroundLikelihood = hist_target.backProject(imgChannelsROI_inner).mul(fgPriorScaled);
|
||||
cv::Mat backgroundLikelihood = hist_background.backProject(imgChannelsROI_inner).mul(bgPriorScaled);
|
||||
|
||||
//convert likelihoods to posterior prob. (Bayes rule)
|
||||
cv::Mat prob_o(newSize, foregroundLikelihood.type());
|
||||
prob_o = p_o*foregroundLikelihood / (p_o*foregroundLikelihood + p_b*backgroundLikelihood);
|
||||
cv::Mat prob_b = 1.0 - prob_o;
|
||||
|
||||
std::pair<cv::Mat, cv::Mat> sizedProbs = getRegularizedSegmentation(prob_o, prob_b,
|
||||
fgPriorScaled, bgPriorScaled);
|
||||
//std::pair<cv::Mat, cv::Mat> sizedProbs = std::pair<cv::Mat, cv::Mat>(prob_o, prob_b);
|
||||
|
||||
//resize probs to original size
|
||||
std::pair<cv::Mat, cv::Mat> probs;
|
||||
cv::resize(sizedProbs.first, probs.first, cv::Size(roiRect_inner.width, roiRect_inner.height));
|
||||
cv::resize(sizedProbs.second, probs.second, cv::Size(roiRect_inner.width, roiRect_inner.height));
|
||||
|
||||
return probs;
|
||||
}
|
||||
|
||||
std::pair<cv::Mat, cv::Mat> Segment::computePosteriors2(std::vector<cv::Mat> &imgChannels,
|
||||
cv::Mat fgPrior, cv::Mat bgPrior, Histogram hist_target, Histogram hist_background)
|
||||
{
|
||||
//preprocess and normalize all data
|
||||
CV_Assert(imgChannels.size() > 0);
|
||||
|
||||
//fit target to the image
|
||||
int x1 = 0;
|
||||
int y1 = 0;
|
||||
int x2 = imgChannels[0].cols-1;
|
||||
int y2 = imgChannels[0].rows-1;
|
||||
|
||||
//compute resize factor so that we control the max area ~32^2
|
||||
double factor = sqrt(1000./((x2-x1)*(y2-y1)));
|
||||
//double factor = 1;
|
||||
if (factor > 1)
|
||||
factor = 1.0;
|
||||
cv::Size newSize(cvFloor((x2-x1)*factor), cvFloor((y2-y1)*factor));
|
||||
|
||||
//rescale input data
|
||||
cv::Rect roiRect_inner = cv::Rect(x1, y1, x2-x1+1, y2-y1+1);
|
||||
std::vector<cv::Mat> imgChannelsROI_inner(imgChannels.size());
|
||||
for (size_t i = 0; i < imgChannels.size(); ++i)
|
||||
cv::resize(imgChannels[i](roiRect_inner), imgChannelsROI_inner[i], newSize);
|
||||
|
||||
//initialize priors if there is no external source and rescale
|
||||
cv::Mat fgPriorScaled;
|
||||
if (fgPrior.cols == 0)
|
||||
fgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1);
|
||||
else
|
||||
cv::resize(fgPrior(roiRect_inner), fgPriorScaled, newSize);
|
||||
|
||||
cv::Mat bgPriorScaled;
|
||||
if (bgPrior.cols == 0)
|
||||
bgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1);
|
||||
else
|
||||
cv::resize(bgPrior(roiRect_inner), bgPriorScaled, newSize);
|
||||
|
||||
//backproject pixels likelihood
|
||||
cv::Mat foregroundLikelihood = hist_target.backProject(imgChannelsROI_inner).mul(fgPriorScaled);
|
||||
cv::Mat backgroundLikelihood = hist_background.backProject(imgChannelsROI_inner).mul(bgPriorScaled);
|
||||
|
||||
//prior for posterior, relative to the number of pixels in bg and fg
|
||||
double p_b = 5./3.;
|
||||
double p_o = 1./(p_b + 1);
|
||||
|
||||
//convert likelihoods to posterior prob. (Bayes rule)
|
||||
cv::Mat prob_o(newSize, foregroundLikelihood.type());
|
||||
prob_o = p_o*foregroundLikelihood / (p_o*foregroundLikelihood + p_b*backgroundLikelihood);
|
||||
cv::Mat prob_b = 1.0 - prob_o;
|
||||
|
||||
std::pair<cv::Mat, cv::Mat> sizedProbs = getRegularizedSegmentation(prob_o, prob_b, fgPriorScaled, bgPriorScaled);
|
||||
|
||||
//resize probs to original size
|
||||
std::pair<cv::Mat, cv::Mat> probs;
|
||||
cv::resize(sizedProbs.first, probs.first, cv::Size(roiRect_inner.width, roiRect_inner.height));
|
||||
cv::resize(sizedProbs.second, probs.second, cv::Size(roiRect_inner.width, roiRect_inner.height));
|
||||
|
||||
return probs;
|
||||
}
|
||||
|
||||
std::pair<cv::Mat, cv::Mat> Segment::getRegularizedSegmentation(
|
||||
cv::Mat &prob_o, cv::Mat &prob_b, cv::Mat & prior_o, cv::Mat & prior_b)
|
||||
{
|
||||
int hsize = cvFloor(std::max(1.0, (double)cvFloor(static_cast<double>(prob_b.cols)*3./50. + 0.5)));
|
||||
int lambdaSize = hsize*2+1;
|
||||
|
||||
//compute gaussian kernel
|
||||
cv::Mat lambda(lambdaSize, lambdaSize, CV_64FC1);
|
||||
double std2 = std::pow(hsize/3.0, 2);
|
||||
double sumLambda = 0.0;
|
||||
for (int y = -hsize; y < hsize + 1; ++y){
|
||||
double * lambdaPtr = lambda.ptr<double>(y+hsize);
|
||||
double tmp_y = y*y;
|
||||
for (int x = -hsize; x < hsize +1; ++x){
|
||||
double tmp_gauss = gaussian(x*x, tmp_y, std2);
|
||||
lambdaPtr[x+hsize] = tmp_gauss;
|
||||
sumLambda += tmp_gauss;
|
||||
}
|
||||
}
|
||||
sumLambda -= lambda.at<double>(hsize, hsize);
|
||||
//set center of kernel to 0
|
||||
lambda.at<double>(hsize, hsize) = 0.0;
|
||||
sumLambda = 1.0/sumLambda;
|
||||
//normalize kernel to sum to 1
|
||||
lambda = lambda*sumLambda;
|
||||
|
||||
//create lambda2 kernel
|
||||
cv::Mat lambda2 = lambda.clone();
|
||||
lambda2.at<double>(hsize, hsize) = 1.0;
|
||||
|
||||
double terminateThr = 1e-1;
|
||||
double logLike = std::numeric_limits<double>::max();
|
||||
int maxIter = 50;
|
||||
|
||||
//return values
|
||||
cv::Mat Qsum_o(prior_o.rows, prior_o.cols, prior_o.type());
|
||||
cv::Mat Qsum_b(prior_o.rows, prior_o.cols, prior_o.type());
|
||||
|
||||
//algorithm temporal
|
||||
cv::Mat Si_o(prior_o.rows, prior_o.cols, prior_o.type());
|
||||
cv::Mat Si_b(prior_o.rows, prior_o.cols, prior_o.type());
|
||||
cv::Mat Ssum_o(prior_o.rows, prior_o.cols, prior_o.type());
|
||||
cv::Mat Ssum_b(prior_o.rows, prior_o.cols, prior_o.type());
|
||||
cv::Mat Qi_o(prior_o.rows, prior_o.cols, prior_o.type());
|
||||
cv::Mat Qi_b(prior_o.rows, prior_o.cols, prior_o.type());
|
||||
cv::Mat logQo(prior_o.rows, prior_o.cols, prior_o.type());
|
||||
cv::Mat logQb(prior_o.rows, prior_o.cols, prior_o.type());
|
||||
|
||||
int i;
|
||||
for (i = 0; i < maxIter; ++i){
|
||||
//follows the equations from Kristan et al. ACCV2014 paper
|
||||
//"A graphical model for rapid obstacle image-map estimation from unmanned surface vehicles"
|
||||
cv::Mat P_Io = prior_o.mul(prob_o) + std::numeric_limits<double>::epsilon();
|
||||
cv::Mat P_Ib = prior_b.mul(prob_b) + std::numeric_limits<double>::epsilon();
|
||||
|
||||
cv::filter2D(prior_o, Si_o, -1, lambda, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
|
||||
cv::filter2D(prior_b, Si_b, -1, lambda, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
|
||||
Si_o = Si_o.mul(prior_o);
|
||||
Si_b = Si_b.mul(prior_b);
|
||||
cv::Mat normSi = 1.0/(Si_o + Si_b);
|
||||
Si_o = Si_o.mul(normSi);
|
||||
Si_b = Si_b.mul(normSi);
|
||||
cv::filter2D(Si_o, Ssum_o, -1, lambda2, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
|
||||
cv::filter2D(Si_b, Ssum_b, -1, lambda2, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
|
||||
|
||||
cv::filter2D(P_Io, Qi_o, -1, lambda, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
|
||||
cv::filter2D(P_Ib, Qi_b, -1, lambda, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
|
||||
Qi_o = Qi_o.mul(P_Io);
|
||||
Qi_b = Qi_b.mul(P_Ib);
|
||||
cv::Mat normQi = 1.0/(Qi_o + Qi_b);
|
||||
Qi_o = Qi_o.mul(normQi);
|
||||
Qi_b = Qi_b.mul(normQi);
|
||||
cv::filter2D(Qi_o, Qsum_o, -1, lambda2, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
|
||||
cv::filter2D(Qi_b, Qsum_b, -1, lambda2, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
|
||||
|
||||
prior_o = (Qsum_o + Ssum_o)*0.25;
|
||||
prior_b = (Qsum_b + Ssum_b)*0.25;
|
||||
cv::Mat normPI = 1.0/(prior_o + prior_b);
|
||||
prior_o = prior_o.mul(normPI);
|
||||
prior_b = prior_b.mul(normPI);
|
||||
|
||||
//converge ?
|
||||
cv::log(Qsum_o, logQo);
|
||||
cv::log(Qsum_b, logQb);
|
||||
cv::Scalar mean = cv::sum(logQo+logQb);
|
||||
double logLikeNew = -mean.val[0]/(2*Qsum_o.rows*Qsum_o.cols);
|
||||
if (std::abs(logLike - logLikeNew) < terminateThr)
|
||||
break;
|
||||
logLike = logLikeNew;
|
||||
}
|
||||
return std::pair<cv::Mat, cv::Mat>(Qsum_o, Qsum_b);
|
||||
}
|
||||
|
||||
} //cv namespace
|
||||
//---------------------------------------------------------------------------------------------------------------------
|
61
modules/video/src/tracking/tracker_csrt_segmentation.hpp
Normal file
61
modules/video/src/tracking/tracker_csrt_segmentation.hpp
Normal file
@ -0,0 +1,61 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_TRACKER_CSRT_SEGMENTATION
|
||||
#define OPENCV_TRACKER_CSRT_SEGMENTATION
|
||||
|
||||
#include "opencv2/core/mat.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
class Histogram
|
||||
{
|
||||
public:
|
||||
int m_numBinsPerDim;
|
||||
int m_numDim;
|
||||
|
||||
Histogram() : m_numBinsPerDim(0), m_numDim(0) {}
|
||||
Histogram(int numDimensions, int numBinsPerDimension = 8);
|
||||
void extractForegroundHistogram(std::vector<cv::Mat> & imgChannels,
|
||||
cv::Mat weights, bool useMatWeights, int x1, int y1, int x2, int y2);
|
||||
void extractBackGroundHistogram(std::vector<cv::Mat> & imgChannels,
|
||||
int x1, int y1, int x2, int y2, int outer_x1, int outer_y1,
|
||||
int outer_x2, int outer_y2);
|
||||
cv::Mat backProject(std::vector<cv::Mat> & imgChannels);
|
||||
std::vector<double> getHistogramVector();
|
||||
void setHistogramVector(double *vector);
|
||||
|
||||
private:
|
||||
int p_size;
|
||||
std::vector<double> p_bins;
|
||||
std::vector<int> p_dimIdCoef;
|
||||
|
||||
inline double kernelProfile_Epanechnikov(double x)
|
||||
{ return (x <= 1) ? (2.0/CV_PI)*(1-x) : 0; }
|
||||
};
|
||||
|
||||
|
||||
class Segment
|
||||
{
|
||||
public:
|
||||
static std::pair<cv::Mat, cv::Mat> computePosteriors(std::vector<cv::Mat> & imgChannels,
|
||||
int x1, int y1, int x2, int y2, cv::Mat weights, cv::Mat fgPrior,
|
||||
cv::Mat bgPrior, const Histogram &fgHistPrior, int numBinsPerChannel = 16);
|
||||
static std::pair<cv::Mat, cv::Mat> computePosteriors2(std::vector<cv::Mat> & imgChannels,
|
||||
int x1, int y1, int x2, int y2, double p_b, cv::Mat fgPrior,
|
||||
cv::Mat bgPrior, Histogram hist_target, Histogram hist_background);
|
||||
static std::pair<cv::Mat, cv::Mat> computePosteriors2(std::vector<cv::Mat> &imgChannels,
|
||||
cv::Mat fgPrior, cv::Mat bgPrior, Histogram hist_target, Histogram hist_background);
|
||||
|
||||
private:
|
||||
static std::pair<cv::Mat, cv::Mat> getRegularizedSegmentation(cv::Mat & prob_o,
|
||||
cv::Mat & prob_b, cv::Mat &prior_o, cv::Mat &prior_b);
|
||||
|
||||
inline static double gaussian(double x2, double y2, double std2){
|
||||
return exp(-(x2 + y2)/(2*std2))/(2*CV_PI*std2);
|
||||
}
|
||||
};
|
||||
|
||||
}//cv namespace
|
||||
#endif
|
563
modules/video/src/tracking/tracker_csrt_utils.cpp
Normal file
563
modules/video/src/tracking/tracker_csrt_utils.cpp
Normal file
@ -0,0 +1,563 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "../precomp.hpp"
|
||||
|
||||
#include "tracker_csrt_utils.hpp"
|
||||
|
||||
namespace cv {
|
||||
|
||||
Mat circshift(Mat matrix, int dx, int dy)
|
||||
{
|
||||
Mat matrix_out = matrix.clone();
|
||||
int idx_y = 0;
|
||||
int idx_x = 0;
|
||||
for(int i=0; i<matrix.rows; i++) {
|
||||
for(int j=0; j<matrix.cols; j++) {
|
||||
idx_y = modul(i+dy+1, matrix.rows);
|
||||
idx_x = modul(j+dx+1, matrix.cols);
|
||||
matrix_out.at<float>(idx_y, idx_x) = matrix.at<float>(i,j);
|
||||
}
|
||||
}
|
||||
return matrix_out;
|
||||
}
|
||||
|
||||
Mat gaussian_shaped_labels(const float sigma, const int w, const int h)
|
||||
{
|
||||
// create 2D Gaussian peak, convert to Fourier space and stores it into the yf
|
||||
Mat y = Mat::zeros(h, w, CV_32F);
|
||||
float w2 = static_cast<float>(cvFloor(w / 2));
|
||||
float h2 = static_cast<float>(cvFloor(h / 2));
|
||||
|
||||
// calculate for each pixel separatelly
|
||||
for(int i=0; i<y.rows; i++) {
|
||||
for(int j=0; j<y.cols; j++) {
|
||||
y.at<float>(i,j) = (float)exp((-0.5 / pow(sigma, 2)) * (pow((i+1-h2), 2) + pow((j+1-w2), 2)));
|
||||
}
|
||||
}
|
||||
// wrap-around with the circulat shifting
|
||||
y = circshift(y, -cvFloor(y.cols / 2), -cvFloor(y.rows / 2));
|
||||
Mat yf;
|
||||
dft(y, yf, DFT_COMPLEX_OUTPUT);
|
||||
return yf;
|
||||
}
|
||||
|
||||
std::vector<Mat> fourier_transform_features(const std::vector<Mat> &M)
|
||||
{
|
||||
std::vector<Mat> out(M.size());
|
||||
Mat channel;
|
||||
// iterate over channels and convert them to Fourier domain
|
||||
for(size_t k = 0; k < M.size(); k++) {
|
||||
M[k].convertTo(channel, CV_32F);
|
||||
dft(channel, channel, DFT_COMPLEX_OUTPUT);
|
||||
out[k] = (channel);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
Mat divide_complex_matrices(const Mat &A, const Mat &B)
|
||||
{
|
||||
std::vector<Mat> va,vb;
|
||||
split(A, va);
|
||||
split(B, vb);
|
||||
|
||||
Mat a = va.at(0);
|
||||
Mat b = va.at(1);
|
||||
Mat c = vb.at(0);
|
||||
Mat d = vb.at(1);
|
||||
|
||||
Mat div = c.mul(c) + d.mul(d);
|
||||
Mat real_part = (a.mul(c) + b.mul(d));
|
||||
Mat im_part = (b.mul(c) - a.mul(d));
|
||||
divide(real_part, div, real_part);
|
||||
divide(im_part, div, im_part);
|
||||
|
||||
std::vector<Mat> tmp(2);
|
||||
tmp[0] = real_part;
|
||||
tmp[1] = im_part;
|
||||
Mat res;
|
||||
merge(tmp, res);
|
||||
return res;
|
||||
}
|
||||
|
||||
Mat get_subwindow(
|
||||
const Mat &image,
|
||||
const Point2f center,
|
||||
const int w,
|
||||
const int h,
|
||||
Rect *valid_pixels)
|
||||
{
|
||||
int startx = cvFloor(center.x) + 1 - (cvFloor(w/2));
|
||||
int starty = cvFloor(center.y) + 1 - (cvFloor(h/2));
|
||||
Rect roi(startx, starty, w, h);
|
||||
int padding_left = 0, padding_right = 0, padding_top = 0, padding_bottom = 0;
|
||||
if(roi.x < 0) {
|
||||
padding_left = -roi.x;
|
||||
roi.x = 0;
|
||||
}
|
||||
if(roi.y < 0) {
|
||||
padding_top = -roi.y;
|
||||
roi.y = 0;
|
||||
}
|
||||
roi.width -= padding_left;
|
||||
roi.height-= padding_top;
|
||||
if(roi.x + roi.width >= image.cols) {
|
||||
padding_right = roi.x + roi.width - image.cols;
|
||||
roi.width = image.cols - roi.x;
|
||||
}
|
||||
if(roi.y + roi.height >= image.rows) {
|
||||
padding_bottom = roi.y + roi.height - image.rows;
|
||||
roi.height = image.rows - roi.y;
|
||||
}
|
||||
Mat subwin = image(roi).clone();
|
||||
copyMakeBorder(subwin, subwin, padding_top, padding_bottom, padding_left, padding_right, BORDER_REPLICATE);
|
||||
|
||||
if(valid_pixels != NULL) {
|
||||
*valid_pixels = Rect(padding_left, padding_top, roi.width, roi.height);
|
||||
}
|
||||
return subwin;
|
||||
}
|
||||
|
||||
float subpixel_peak(const Mat &response, const std::string &s, const Point2f &p)
|
||||
{
|
||||
int i_p0, i_p_l, i_p_r; // indexes in response
|
||||
float p0, p_l, p_r; // values in response
|
||||
|
||||
if(s.compare("vertical") == 0) {
|
||||
// neighbouring rows
|
||||
i_p0 = cvRound(p.y);
|
||||
i_p_l = modul(cvRound(p.y) - 1, response.rows);
|
||||
i_p_r = modul(cvRound(p.y) + 1, response.rows);
|
||||
int px = static_cast<int>(p.x);
|
||||
p0 = response.at<float>(i_p0, px);
|
||||
p_l = response.at<float>(i_p_l, px);
|
||||
p_r = response.at<float>(i_p_r, px);
|
||||
} else if(s.compare("horizontal") == 0) {
|
||||
// neighbouring cols
|
||||
i_p0 = cvRound(p.x);
|
||||
i_p_l = modul(cvRound(p.x) - 1, response.cols);
|
||||
i_p_r = modul(cvRound(p.x) + 1, response.cols);
|
||||
int py = static_cast<int>(p.y);
|
||||
p0 = response.at<float>(py, i_p0);
|
||||
p_l = response.at<float>(py, i_p_l);
|
||||
p_r = response.at<float>(py, i_p_r);
|
||||
} else {
|
||||
std::cout << "Warning: unknown subpixel peak direction!" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
float delta = 0.5f * (p_r - p_l) / (2*p0 - p_r - p_l);
|
||||
if(!std::isfinite(delta)) {
|
||||
delta = 0;
|
||||
}
|
||||
|
||||
return delta;
|
||||
}
|
||||
|
||||
inline float chebpoly(const int n, const float x)
|
||||
{
|
||||
float res;
|
||||
if (fabs(x) <= 1)
|
||||
res = cos(n*acos(x));
|
||||
else
|
||||
res = cosh(n*acosh(x));
|
||||
return res;
|
||||
}
|
||||
|
||||
static Mat chebwin(int N, const float atten)
|
||||
{
|
||||
Mat out(N , 1, CV_32FC1);
|
||||
int nn, i;
|
||||
float M, n, sum = 0, max=0;
|
||||
float tg = static_cast<float>(pow(10,atten/20.0f)); /* 1/r term [2], 10^gamma [2] */
|
||||
float x0 = cosh((1.0f/(N-1))*acosh(tg));
|
||||
M = (N-1)/2.0f;
|
||||
if(N%2==0)
|
||||
M = M + 0.5f; /* handle even length windows */
|
||||
for(nn=0; nn<(N/2+1); nn++) {
|
||||
n = nn-M;
|
||||
sum = 0;
|
||||
for(i=1; i<=M; i++){
|
||||
sum += chebpoly(N-1,x0*static_cast<float>(cos(CV_PI*i/N))) *
|
||||
static_cast<float>(cos(2.0f*n*CV_PI*i/N));
|
||||
}
|
||||
out.at<float>(nn,0) = tg + 2*sum;
|
||||
out.at<float>(N-nn-1,0) = out.at<float>(nn,0) ;
|
||||
if(out.at<float>(nn,0) > max)
|
||||
max = out.at<float>(nn,0);
|
||||
}
|
||||
for(nn=0; nn<N; nn++)
|
||||
out.at<float>(nn,0) /= max; /* normalize everything */
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
static double modified_bessel(int order, double x)
|
||||
{
|
||||
// sum m=0:inf 1/(m! * Gamma(m + order + 1)) * (x/2)^(2m + order)
|
||||
const double eps = 1e-13;
|
||||
double result = 0;
|
||||
double m = 0;
|
||||
double gamma = 1.0;
|
||||
for(int i = 2; i <= order; ++i)
|
||||
gamma *= i;
|
||||
double term = pow(x,order) / (pow(2,order) * gamma);
|
||||
|
||||
while(term > eps * result) {
|
||||
result += term;
|
||||
//calculate new term in series
|
||||
++m;
|
||||
term *= (x*x) / (4*m*(m+order));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
Mat get_hann_win(Size sz)
|
||||
{
|
||||
Mat hann_rows = Mat::ones(sz.height, 1, CV_32F);
|
||||
Mat hann_cols = Mat::ones(1, sz.width, CV_32F);
|
||||
int NN = sz.height - 1;
|
||||
if(NN != 0) {
|
||||
for (int i = 0; i < hann_rows.rows; ++i) {
|
||||
hann_rows.at<float>(i,0) = (float)(1.0/2.0 * (1.0 - cos(2*CV_PI*i/NN)));
|
||||
}
|
||||
}
|
||||
NN = sz.width - 1;
|
||||
if(NN != 0) {
|
||||
for (int i = 0; i < hann_cols.cols; ++i) {
|
||||
hann_cols.at<float>(0,i) = (float)(1.0/2.0 * (1.0 - cos(2*CV_PI*i/NN)));
|
||||
}
|
||||
}
|
||||
return hann_rows * hann_cols;
|
||||
}
|
||||
|
||||
Mat get_kaiser_win(Size sz, float alpha)
|
||||
{
|
||||
Mat kaiser_rows = Mat::ones(sz.height, 1, CV_32F);
|
||||
Mat kaiser_cols = Mat::ones(1, sz.width, CV_32F);
|
||||
|
||||
int N = sz.height - 1;
|
||||
double shape = alpha;
|
||||
double den = 1.0 / modified_bessel(0, shape);
|
||||
|
||||
for(int n = 0; n <= N; ++n) {
|
||||
double K = (2.0 * n * 1.0/N) - 1.0;
|
||||
double x = sqrt(1.0 - (K * K));
|
||||
kaiser_rows.at<float>(n,0) = static_cast<float>(modified_bessel(0, shape * x) * den);
|
||||
}
|
||||
|
||||
N = sz.width - 1;
|
||||
for(int n = 0; n <= N; ++n) {
|
||||
double K = (2.0 * n * 1.0/N) - 1.0;
|
||||
double x = sqrt(1.0 - (K * K));
|
||||
kaiser_cols.at<float>(0,n) = static_cast<float>(modified_bessel(0, shape * x) * den);
|
||||
}
|
||||
|
||||
return kaiser_rows * kaiser_cols;
|
||||
}
|
||||
|
||||
Mat get_chebyshev_win(Size sz, float attenuation)
|
||||
{
|
||||
Mat cheb_rows = chebwin(sz.height, attenuation);
|
||||
Mat cheb_cols = chebwin(sz.width, attenuation).t();
|
||||
return cheb_rows * cheb_cols;
|
||||
}
|
||||
|
||||
static void computeHOG32D(const Mat &imageM, Mat &featM, const int sbin, const int pad_x, const int pad_y)
|
||||
{
|
||||
const int dimHOG = 32;
|
||||
CV_Assert(pad_x >= 0);
|
||||
CV_Assert(pad_y >= 0);
|
||||
CV_Assert(imageM.channels() == 3);
|
||||
CV_Assert(imageM.depth() == CV_64F);
|
||||
|
||||
// epsilon to avoid division by zero
|
||||
const double eps = 0.0001;
|
||||
// number of orientations
|
||||
const int numOrient = 18;
|
||||
// unit vectors to compute gradient orientation
|
||||
const double uu[9] = {1.000, 0.9397, 0.7660, 0.5000, 0.1736, -0.1736, -0.5000, -0.7660, -0.9397};
|
||||
const double vv[9] = {0.000, 0.3420, 0.6428, 0.8660, 0.9848, 0.9848, 0.8660, 0.6428, 0.3420};
|
||||
|
||||
// image size
|
||||
const Size imageSize = imageM.size();
|
||||
// block size
|
||||
// int bW = cvRound((double)imageSize.width/(double)sbin);
|
||||
// int bH = cvRound((double)imageSize.height/(double)sbin);
|
||||
int bW = cvFloor((double)imageSize.width/(double)sbin);
|
||||
int bH = cvFloor((double)imageSize.height/(double)sbin);
|
||||
const Size blockSize(bW, bH);
|
||||
// size of HOG features
|
||||
int oW = max(blockSize.width-2, 0) + 2*pad_x;
|
||||
int oH = max(blockSize.height-2, 0) + 2*pad_y;
|
||||
Size outSize = Size(oW, oH);
|
||||
// size of visible
|
||||
const Size visible = blockSize*sbin;
|
||||
|
||||
// initialize historgram, norm, output feature matrices
|
||||
Mat histM = Mat::zeros(Size(blockSize.width*numOrient, blockSize.height), CV_64F);
|
||||
Mat normM = Mat::zeros(Size(blockSize.width, blockSize.height), CV_64F);
|
||||
featM = Mat::zeros(Size(outSize.width*dimHOG, outSize.height), CV_64F);
|
||||
|
||||
// get the stride of each matrix
|
||||
const size_t imStride = imageM.step1();
|
||||
const size_t histStride = histM.step1();
|
||||
const size_t normStride = normM.step1();
|
||||
const size_t featStride = featM.step1();
|
||||
|
||||
// calculate the zero offset
|
||||
const double* im = imageM.ptr<double>(0);
|
||||
double* const hist = histM.ptr<double>(0);
|
||||
double* const norm = normM.ptr<double>(0);
|
||||
double* const feat = featM.ptr<double>(0);
|
||||
|
||||
for (int y = 1; y < visible.height - 1; y++)
|
||||
{
|
||||
for (int x = 1; x < visible.width - 1; x++)
|
||||
{
|
||||
// OpenCV uses an interleaved format: BGR-BGR-BGR
|
||||
const double* s = im + 3*min(x, imageM.cols-2) + min(y, imageM.rows-2)*imStride;
|
||||
|
||||
// blue image channel
|
||||
double dyb = *(s+imStride) - *(s-imStride);
|
||||
double dxb = *(s+3) - *(s-3);
|
||||
double vb = dxb*dxb + dyb*dyb;
|
||||
|
||||
// green image channel
|
||||
s += 1;
|
||||
double dyg = *(s+imStride) - *(s-imStride);
|
||||
double dxg = *(s+3) - *(s-3);
|
||||
double vg = dxg*dxg + dyg*dyg;
|
||||
|
||||
// red image channel
|
||||
s += 1;
|
||||
double dy = *(s+imStride) - *(s-imStride);
|
||||
double dx = *(s+3) - *(s-3);
|
||||
double v = dx*dx + dy*dy;
|
||||
|
||||
// pick the channel with the strongest gradient
|
||||
if (vg > v) { v = vg; dx = dxg; dy = dyg; }
|
||||
if (vb > v) { v = vb; dx = dxb; dy = dyb; }
|
||||
|
||||
// snap to one of the 18 orientations
|
||||
double best_dot = 0;
|
||||
int best_o = 0;
|
||||
for (int o = 0; o < (int)numOrient/2; o++)
|
||||
{
|
||||
double dot = uu[o]*dx + vv[o]*dy;
|
||||
if (dot > best_dot)
|
||||
{
|
||||
best_dot = dot;
|
||||
best_o = o;
|
||||
}
|
||||
else if (-dot > best_dot)
|
||||
{
|
||||
best_dot = -dot;
|
||||
best_o = o + (int)(numOrient/2);
|
||||
}
|
||||
}
|
||||
|
||||
// add to 4 historgrams around pixel using bilinear interpolation
|
||||
double yp = ((double)y+0.5)/(double)sbin - 0.5;
|
||||
double xp = ((double)x+0.5)/(double)sbin - 0.5;
|
||||
int iyp = (int)cvFloor(yp);
|
||||
int ixp = (int)cvFloor(xp);
|
||||
double vy0 = yp - iyp;
|
||||
double vx0 = xp - ixp;
|
||||
double vy1 = 1.0 - vy0;
|
||||
double vx1 = 1.0 - vx0;
|
||||
v = sqrt(v);
|
||||
|
||||
// fill the value into the 4 neighborhood cells
|
||||
if (iyp >= 0 && ixp >= 0)
|
||||
*(hist + iyp*histStride + ixp*numOrient + best_o) += vy1*vx1*v;
|
||||
|
||||
if (iyp >= 0 && ixp+1 < blockSize.width)
|
||||
*(hist + iyp*histStride + (ixp+1)*numOrient + best_o) += vx0*vy1*v;
|
||||
|
||||
if (iyp+1 < blockSize.height && ixp >= 0)
|
||||
*(hist + (iyp+1)*histStride + ixp*numOrient + best_o) += vy0*vx1*v;
|
||||
|
||||
if (iyp+1 < blockSize.height && ixp+1 < blockSize.width)
|
||||
*(hist + (iyp+1)*histStride + (ixp+1)*numOrient + best_o) += vy0*vx0*v;
|
||||
|
||||
} // for y
|
||||
} // for x
|
||||
|
||||
// compute the energy in each block by summing over orientation
|
||||
for (int y = 0; y < blockSize.height; y++)
|
||||
{
|
||||
const double* src = hist + y*histStride;
|
||||
double* dst = norm + y*normStride;
|
||||
double const* const dst_end = dst + blockSize.width;
|
||||
// for each cell
|
||||
while (dst < dst_end)
|
||||
{
|
||||
*dst = 0;
|
||||
for (int o = 0; o < (int)(numOrient/2); o++)
|
||||
{
|
||||
*dst += (*src + *(src + numOrient/2))*
|
||||
(*src + *(src + numOrient/2));
|
||||
src++;
|
||||
}
|
||||
dst++;
|
||||
src += numOrient/2;
|
||||
}
|
||||
}
|
||||
|
||||
// compute the features
|
||||
for (int y = pad_y; y < outSize.height - pad_y; y++)
|
||||
{
|
||||
for (int x = pad_x; x < outSize.width - pad_x; x++)
|
||||
{
|
||||
double* dst = feat + y*featStride + x*dimHOG;
|
||||
double* p, n1, n2, n3, n4;
|
||||
const double* src;
|
||||
|
||||
p = norm + (y - pad_y + 1)*normStride + (x - pad_x + 1);
|
||||
n1 = 1.0f / sqrt(*p + *(p + 1) + *(p + normStride) + *(p + normStride + 1) + eps);
|
||||
p = norm + (y - pad_y)*normStride + (x - pad_x + 1);
|
||||
n2 = 1.0f / sqrt(*p + *(p + 1) + *(p + normStride) + *(p + normStride + 1) + eps);
|
||||
p = norm + (y- pad_y + 1)*normStride + x - pad_x;
|
||||
n3 = 1.0f / sqrt(*p + *(p + 1) + *(p + normStride) + *(p + normStride + 1) + eps);
|
||||
p = norm + (y - pad_y)*normStride + x - pad_x;
|
||||
n4 = 1.0f / sqrt(*p + *(p + 1) + *(p + normStride) + *(p + normStride + 1) + eps);
|
||||
|
||||
double t1 = 0.0, t2 = 0.0, t3 = 0.0, t4 = 0.0;
|
||||
|
||||
// contrast-sesitive features
|
||||
src = hist + (y - pad_y + 1)*histStride + (x - pad_x + 1)*numOrient;
|
||||
for (int o = 0; o < numOrient; o++)
|
||||
{
|
||||
double val = *src;
|
||||
double h1 = min(val*n1, 0.2);
|
||||
double h2 = min(val*n2, 0.2);
|
||||
double h3 = min(val*n3, 0.2);
|
||||
double h4 = min(val*n4, 0.2);
|
||||
*(dst++) = 0.5 * (h1 + h2 + h3 + h4);
|
||||
|
||||
src++;
|
||||
t1 += h1;
|
||||
t2 += h2;
|
||||
t3 += h3;
|
||||
t4 += h4;
|
||||
}
|
||||
|
||||
// contrast-insensitive features
|
||||
src = hist + (y - pad_y + 1)*histStride + (x - pad_x + 1)*numOrient;
|
||||
for (int o = 0; o < numOrient/2; o++)
|
||||
{
|
||||
double sum = *src + *(src + numOrient/2);
|
||||
double h1 = min(sum * n1, 0.2);
|
||||
double h2 = min(sum * n2, 0.2);
|
||||
double h3 = min(sum * n3, 0.2);
|
||||
double h4 = min(sum * n4, 0.2);
|
||||
*(dst++) = 0.5 * (h1 + h2 + h3 + h4);
|
||||
src++;
|
||||
}
|
||||
|
||||
// texture features
|
||||
*(dst++) = 0.2357 * t1;
|
||||
*(dst++) = 0.2357 * t2;
|
||||
*(dst++) = 0.2357 * t3;
|
||||
*(dst++) = 0.2357 * t4;
|
||||
// truncation feature
|
||||
*dst = 0;
|
||||
}// for x
|
||||
}// for y
|
||||
// Truncation features
|
||||
for (int m = 0; m < featM.rows; m++)
|
||||
{
|
||||
for (int n = 0; n < featM.cols; n += dimHOG)
|
||||
{
|
||||
if (m > pad_y - 1 && m < featM.rows - pad_y && n > pad_x*dimHOG - 1 && n < featM.cols - pad_x*dimHOG)
|
||||
continue;
|
||||
|
||||
featM.at<double>(m, n + dimHOG - 1) = 1;
|
||||
} // for x
|
||||
}// for y
|
||||
}
|
||||
|
||||
std::vector<Mat> get_features_hog(const Mat &im, const int bin_size)
|
||||
{
|
||||
Mat hogmatrix;
|
||||
Mat im_;
|
||||
im.convertTo(im_, CV_64FC3, 1.0/255.0);
|
||||
computeHOG32D(im_,hogmatrix,bin_size,1,1);
|
||||
hogmatrix.convertTo(hogmatrix, CV_32F);
|
||||
Size hog_size = im.size();
|
||||
hog_size.width /= bin_size;
|
||||
hog_size.height /= bin_size;
|
||||
Mat hogc(hog_size, CV_32FC(32), hogmatrix.data);
|
||||
std::vector<Mat> features;
|
||||
split(hogc, features);
|
||||
return features;
|
||||
}
|
||||
|
||||
// std::vector<Mat> get_features_cn(const Mat &ppatch_data, const Size &output_size) {
|
||||
// Mat patch_data = ppatch_data.clone();
|
||||
// Vec3b & pixel = patch_data.at<Vec3b>(0,0);
|
||||
// unsigned index;
|
||||
|
||||
// Mat cnFeatures = Mat::zeros(patch_data.rows,patch_data.cols,CV_32FC(10));
|
||||
|
||||
// for(int i=0;i<patch_data.rows;i++){
|
||||
// for(int j=0;j<patch_data.cols;j++){
|
||||
// pixel=patch_data.at<Vec3b>(i,j);
|
||||
// index=(unsigned)(cvFloor((float)pixel[2]/8)+32*cvFloor((float)pixel[1]/8)+32*32*cvFloor((float)pixel[0]/8));
|
||||
|
||||
// //copy the values
|
||||
// for(int k=0;k<10;k++){
|
||||
// cnFeatures.at<Vec<float,10> >(i,j)[k]=(float)ColorNames[index][k];
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// std::vector<Mat> result;
|
||||
// split(cnFeatures, result);
|
||||
// for (size_t i = 0; i < result.size(); i++) {
|
||||
// if (output_size.width > 0 && output_size.height > 0) {
|
||||
// resize(result.at(i), result.at(i), output_size, INTER_CUBIC);
|
||||
// }
|
||||
// }
|
||||
// return result;
|
||||
// }
|
||||
|
||||
std::vector<Mat> get_features_rgb(const Mat &patch, const Size &output_size)
|
||||
{
|
||||
std::vector<Mat> channels;
|
||||
split(patch, channels);
|
||||
for(size_t k=0; k<channels.size(); k++) {
|
||||
channels[k].convertTo(channels[k], CV_32F, 1.0/255.0, -0.5);
|
||||
channels[k] = channels[k] - mean(channels[k])[0];
|
||||
resize(channels[k], channels[k], output_size, INTER_CUBIC);
|
||||
}
|
||||
return channels;
|
||||
}
|
||||
|
||||
double get_max(const Mat &m)
|
||||
{
|
||||
double val;
|
||||
minMaxLoc(m, NULL, &val, NULL, NULL);
|
||||
return val;
|
||||
}
|
||||
|
||||
double get_min(const Mat &m)
|
||||
{
|
||||
double val;
|
||||
minMaxLoc(m, &val, NULL, NULL, NULL);
|
||||
return val;
|
||||
}
|
||||
|
||||
Mat bgr2hsv(const Mat &img)
|
||||
{
|
||||
Mat hsv_img;
|
||||
cvtColor(img, hsv_img, COLOR_BGR2HSV);
|
||||
std::vector<Mat> hsv_img_channels;
|
||||
split(hsv_img, hsv_img_channels);
|
||||
hsv_img_channels.at(0).convertTo(hsv_img_channels.at(0), CV_8UC1, 255.0 / 180.0);
|
||||
merge(hsv_img_channels, hsv_img);
|
||||
return hsv_img;
|
||||
}
|
||||
|
||||
} //cv namespace
|
54
modules/video/src/tracking/tracker_csrt_utils.hpp
Normal file
54
modules/video/src/tracking/tracker_csrt_utils.hpp
Normal file
@ -0,0 +1,54 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_TRACKER_CSRT_UTILS
|
||||
#define OPENCV_TRACKER_CSRT_UTILS
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
|
||||
#include "opencv2/core/mat.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
inline int modul(int a, int b)
|
||||
{
|
||||
// function calculates the module of two numbers and it takes into account also negative numbers
|
||||
return ((a % b) + b) % b;
|
||||
}
|
||||
|
||||
inline double kernel_epan(double x)
|
||||
{
|
||||
return (x <= 1) ? (2.0/3.14)*(1-x) : 0;
|
||||
}
|
||||
|
||||
Mat circshift(Mat matrix, int dx, int dy);
|
||||
Mat gaussian_shaped_labels(const float sigma, const int w, const int h);
|
||||
std::vector<Mat> fourier_transform_features(const std::vector<Mat> &M);
|
||||
Mat divide_complex_matrices(const Mat &A, const Mat &B);
|
||||
Mat get_subwindow(const Mat &image, const Point2f center,
|
||||
const int w, const int h,Rect *valid_pixels = NULL);
|
||||
|
||||
float subpixel_peak(const Mat &response, const std::string &s, const Point2f &p);
|
||||
double get_max(const Mat &m);
|
||||
double get_min(const Mat &m);
|
||||
|
||||
Mat get_hann_win(Size sz);
|
||||
Mat get_kaiser_win(Size sz, float alpha);
|
||||
Mat get_chebyshev_win(Size sz, float attenuation);
|
||||
|
||||
std::vector<Mat> get_features_rgb(const Mat &patch, const Size &output_size);
|
||||
std::vector<Mat> get_features_hog(const Mat &im, const int bin_size);
|
||||
// std::vector<Mat> get_features_cn(const Mat &im, const Size &output_size);
|
||||
|
||||
Mat bgr2hsv(const Mat &img);
|
||||
|
||||
} //cv namespace
|
||||
|
||||
#endif
|
@ -38,12 +38,24 @@ TEST_P(DistanceAndOverlap, MIL)
|
||||
test.run(numFramesLimit);
|
||||
}
|
||||
|
||||
TEST_P(DistanceAndOverlap, CSRT)
|
||||
{
|
||||
TrackerTest<Tracker, Rect> test(TrackerCSRT::create(), dataset, 22, .7f, NoTransform);
|
||||
test.run();
|
||||
}
|
||||
|
||||
TEST_P(DistanceAndOverlap, Shifted_Data_MIL)
|
||||
{
|
||||
TrackerTest<Tracker, Rect> test(TrackerMIL::create(), dataset, 30, .6f, CenterShiftLeft);
|
||||
test.run(numFramesLimit);
|
||||
}
|
||||
|
||||
TEST_P(DistanceAndOverlap, Shifted_Data_CSRT)
|
||||
{
|
||||
TrackerTest<Tracker, Rect> test(TrackerCSRT::create(), dataset, 13, .69f, CenterShiftLeft);
|
||||
test.run();
|
||||
}
|
||||
|
||||
/***************************************************************************************/
|
||||
//Tests with scaled initial window
|
||||
|
||||
@ -53,6 +65,12 @@ TEST_P(DistanceAndOverlap, Scaled_Data_MIL)
|
||||
test.run(numFramesLimit);
|
||||
}
|
||||
|
||||
TEST_P(DistanceAndOverlap, Scaled_Data_CSRT)
|
||||
{
|
||||
TrackerTest<Tracker, Rect> test(TrackerCSRT::create(), dataset, 22, 0.69f, Scale_1_1, 1);
|
||||
test.run();
|
||||
}
|
||||
|
||||
TEST_P(DistanceAndOverlap, GOTURN)
|
||||
{
|
||||
std::string model = cvtest::findDataFile("dnn/gsoc2016-goturn/goturn.prototxt");
|
||||
|
Loading…
Reference in New Issue
Block a user