opencv/modules/video/src/tracking/tracker_csrt_segmentation.cpp

// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.

#include "../precomp.hpp"

#include "tracker_csrt_segmentation.hpp"

#include <fstream>
#include <iostream>
#include <vector>
#include <iostream>

//-------------------- HISTOGRAM CLASS --------------------
namespace cv
{

Histogram::Histogram(int numDimensions, int numBinsPerDimension)
{
    m_numBinsPerDim = numBinsPerDimension;
    m_numDim = numDimensions;
    p_size = cvFloor(std::pow(m_numBinsPerDim, m_numDim));
    p_bins.resize(p_size, 0);
    p_dimIdCoef.resize(m_numDim, 1);
    for (int i = 0; i < m_numDim-1; ++i)
        p_dimIdCoef[i] = static_cast<int>(std::pow(numBinsPerDimension, m_numDim - 1 - i));

}

void Histogram::extractForegroundHistogram(std::vector<cv::Mat> & imgChannels,
        cv::Mat weights, bool useMatWeights, int x1, int y1, int x2, int y2)
{
    //just for code clarity
    cv::Mat & img = imgChannels[0];

    if (!useMatWeights){
        //weights are epanechnikov distr. with peek at the center of the image;
        double cx = x1 + (x2-x1)/2.;
        double cy = y1 + (y2-y1)/2.;
        double kernelSize_width = 1.0/(0.5*static_cast<double>(x2-x1)*1.4142+1);  //sqrt(2)
        double kernelSize_height = 1.0/(0.5*static_cast<double>(y2-y1)*1.4142+1);

        cv::Mat kernelWeight(img.rows, img.cols, CV_64FC1);
        for (int y = y1; y < y2+1; ++y){
            double * weightPtr = kernelWeight.ptr<double>(y);
            double tmp_y = std::pow((cy-y)*kernelSize_height, 2);
            for (int x = x1; x < x2+1; ++x){
                weightPtr[x] = kernelProfile_Epanechnikov(std::pow((cx-x)*kernelSize_width,2) + tmp_y);
            }
        }
        weights = kernelWeight;
    }
    //extract pixel values and compute histogram
    double rangePerBinInverse = static_cast<double>(m_numBinsPerDim)/256.0;  // 1 / (imgRange/numBinsPerDim)
    double sum = 0;
    for (int y = y1; y < y2+1; ++y){
        std::vector<const uchar *> dataPtr(m_numDim);
        for (int dim = 0; dim < m_numDim; ++dim)
            dataPtr[dim] = imgChannels[dim].ptr<uchar>(y);
        const double * weightPtr = weights.ptr<double>(y);

        for (int x = x1; x < x2+1; ++x){
            int id = 0;
            for (int dim = 0; dim < m_numDim; ++dim){
                id += p_dimIdCoef[dim]*cvFloor(rangePerBinInverse*dataPtr[dim][x]);
            }
            p_bins[id] += weightPtr[x];
            sum += weightPtr[x];
        }
    }
    //normalize
    sum = 1./sum;
    for(int i = 0; i < p_size; ++i)
        p_bins[i] *= sum;
}

void Histogram::extractBackGroundHistogram(
        std::vector<cv::Mat> & imgChannels,
        int x1, int y1, int x2, int y2,
        int outer_x1, int outer_y1, int outer_x2, int outer_y2)
{
    //extract pixel values and compute histogram
    double rangePerBinInverse = static_cast<double>(m_numBinsPerDim)/256.0;  // 1 / (imgRange/numBinsPerDim)
    double sum = 0;
    for (int y = outer_y1; y < outer_y2; ++y){

        std::vector<const uchar *> dataPtr(m_numDim);
        for (int dim = 0; dim < m_numDim; ++dim)
            dataPtr[dim] = imgChannels[dim].ptr<uchar>(y);

        for (int x = outer_x1; x < outer_x2; ++x){
            if (x >= x1 && x <= x2 && y >= y1 && y <= y2)
                continue;

            int id = 0;
            for (int dim = 0; dim < m_numDim; ++dim){
                id += p_dimIdCoef[dim]*cvFloor(rangePerBinInverse*dataPtr[dim][x]);
            }
            p_bins[id] += 1.0;
            sum += 1.0;
        }
    }
    //normalize
    sum = 1./sum;
    for(int i = 0; i < p_size; ++i)
        p_bins[i] *= sum;
}

cv::Mat Histogram::backProject(std::vector<cv::Mat> & imgChannels)
{
    //just for code clarity
    cv::Mat & img = imgChannels[0];

    cv::Mat backProject(img.rows, img.cols, CV_64FC1);
    double rangePerBinInverse = static_cast<double>(m_numBinsPerDim)/256.0;  // 1 / (imgRange/numBinsPerDim)

    for (int y = 0; y < img.rows; ++y){
        double * backProjectPtr = backProject.ptr<double>(y);
        std::vector<const uchar *> dataPtr(m_numDim);
        for (int dim = 0; dim < m_numDim; ++dim)
            dataPtr[dim] = imgChannels[dim].ptr<uchar>(y);

        for (int x = 0; x < img.cols; ++x){
            int id = 0;
            for (int dim = 0; dim < m_numDim; ++dim){
                id += p_dimIdCoef[dim]*cvFloor(rangePerBinInverse*dataPtr[dim][x]);
            }
            backProjectPtr[x] = p_bins[id];
        }
    }
    return backProject;
}

// add new methods
std::vector<double> Histogram::getHistogramVector() {
    return p_bins;
}

void Histogram::setHistogramVector(double *vector) {
    for (size_t i=0; i<p_bins.size(); i++) {
        p_bins[i] = vector[i];
    }
}

//-------------------- SEGMENT CLASS --------------------
std::pair<cv::Mat, cv::Mat> Segment::computePosteriors(
        std::vector<cv::Mat> &imgChannels,
        int x1, int y1, int x2, int y2,
        cv::Mat weights, cv::Mat fgPrior, cv::Mat bgPrior,
        const Histogram &fgHistPrior, int numBinsPerChannel)
{
    //preprocess and normalize all data
    CV_Assert(imgChannels.size() > 0);

    //fit target to the image
    x1 = std::min(std::max(x1, 0), imgChannels[0].cols-1);
    y1 = std::min(std::max(y1, 0), imgChannels[0].rows-1);
    x2 = std::max(std::min(x2, imgChannels[0].cols-1), 0);
    y2 = std::max(std::min(y2, imgChannels[0].rows-1), 0);

    //enlarge bbox by 1/3 of its size for background area
    int offsetX = (x2-x1)/3;
    int offsetY = (y2-y1)/3;
    int outer_y1 = std::max(0, (int)(y1-offsetY));
    int outer_y2 = std::min(imgChannels[0].rows, (int)(y2+offsetY+1));
    int outer_x1 = std::max(0, (int)(x1-offsetX));
    int outer_x2 = std::min(imgChannels[0].cols, (int)(x2+offsetX+1));

    //extract histogram from original data -> more pixels better representation of distr. by histograms
    Histogram hist_target =
        (fgHistPrior.m_numBinsPerDim == numBinsPerChannel && (size_t)fgHistPrior.m_numDim == imgChannels.size())
        ? fgHistPrior : Histogram(static_cast<int>(imgChannels.size()), numBinsPerChannel);
    Histogram hist_background(static_cast<int>(imgChannels.size()), numBinsPerChannel);
    if (weights.cols == 0)
        hist_target.extractForegroundHistogram(imgChannels, cv::Mat(), false, x1, y1, x2, y2);
    else
        hist_target.extractForegroundHistogram(imgChannels, weights, true, x1, y1, x2, y2);
    hist_background.extractBackGroundHistogram(imgChannels, x1, y1, x2, y2,
            outer_x1, outer_y1, outer_x2, outer_y2);

    //compute resize factor so that the max area is 1000 (=avg. size ~ 32x32)
    double factor = sqrt(1000.0/((x2-x1)*(y2-y1)));
    if (factor > 1)
        factor = 1.0;
    cv::Size newSize(cvFloor((x2-x1)*factor), cvFloor((y2-y1)*factor));

    //rescale input data
    cv::Rect roiRect_inner = cv::Rect(x1, y1, x2-x1, y2-y1);
    std::vector<cv::Mat> imgChannelsROI_inner(imgChannels.size());
    for (size_t i = 0; i < imgChannels.size(); ++i)
        cv::resize(imgChannels[i](roiRect_inner), imgChannelsROI_inner[i], newSize);

    //initialize priors if there is no external source and rescale
    cv::Mat fgPriorScaled;
    if (fgPrior.cols == 0)
        fgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1);
    else
        cv::resize(fgPrior(roiRect_inner), fgPriorScaled, newSize);
    cv::Mat bgPriorScaled;
    if (bgPrior.cols == 0)
        bgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1);
    else
        cv::resize(bgPrior(roiRect_inner), bgPriorScaled, newSize);

    //backproject pixels likelihood
    cv::Mat foregroundLikelihood = hist_target.backProject(imgChannelsROI_inner).mul(fgPriorScaled);
    cv::Mat backgroundLikelihood = hist_background.backProject(imgChannelsROI_inner).mul(bgPriorScaled);

    double p_b = std::sqrt((std::pow(outer_x2-outer_x1, 2) + std::pow(outer_y2-outer_y1, 2)) /
            (std::pow(x2-x1, 2) + std::pow(y2-y1, 2))) ;
    double p_o = 1./(p_b + 1);

    //convert likelihoods to posterior prob. (Bayes rule)
    cv::Mat prob_o(newSize, foregroundLikelihood.type());
    prob_o = p_o*foregroundLikelihood / (p_o*foregroundLikelihood + p_b*backgroundLikelihood);
    cv::Mat prob_b = 1.0 - prob_o;

    std::pair<cv::Mat, cv::Mat> sizedProbs = getRegularizedSegmentation(prob_o, prob_b, fgPriorScaled, bgPriorScaled);

    //resize probs to original size
    std::pair<cv::Mat, cv::Mat> probs;
    cv::resize(sizedProbs.first, probs.first, cv::Size(roiRect_inner.width, roiRect_inner.height));
    cv::resize(sizedProbs.second, probs.second, cv::Size(roiRect_inner.width, roiRect_inner.height));

    return probs;
}

std::pair<cv::Mat, cv::Mat> Segment::computePosteriors2(
    std::vector<cv::Mat> &imgChannels, int x1, int y1, int x2, int y2, double p_b,
    cv::Mat fgPrior, cv::Mat bgPrior, Histogram hist_target, Histogram hist_background)
{
    //preprocess and normalize all data
    CV_Assert(imgChannels.size() > 0);

    //fit target to the image
    x1 = std::min(std::max(x1, 0), imgChannels[0].cols-1);
    y1 = std::min(std::max(y1, 0), imgChannels[0].rows-1);
    x2 = std::max(std::min(x2, imgChannels[0].cols-1), 0);
    y2 = std::max(std::min(y2, imgChannels[0].rows-1), 0);

    // calculate width and height of the region
    int w = x2 - x1 + 1;
    int h = y2 - y1 + 1;
    w = std::min(std::max(w, 1), imgChannels[0].cols);
    h = std::min(std::max(h, 1), imgChannels[0].rows);

    //double p_o = 1./(p_b + 1);
    double p_o = 1. - p_b;

    //compute resize factor so that the max area is 1000 (=avg. size ~ 32x32)
    double factor = sqrt(1000.0/(w*h));
    if (factor > 1)
        factor = 1.0;
    cv::Size newSize(cvFloor(w*factor), cvFloor(h*factor));

    //rescale input data
    cv::Rect roiRect_inner = cv::Rect(x1, y1, w, h);
    std::vector<cv::Mat> imgChannelsROI_inner(imgChannels.size());
    for (size_t i = 0; i < imgChannels.size(); ++i)
        cv::resize(imgChannels[i](roiRect_inner), imgChannelsROI_inner[i], newSize);

    //initialize priors if there is no external source and rescale
    cv::Mat fgPriorScaled;
    if (fgPrior.cols == 0)
        fgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1);
    else
        cv::resize(fgPrior(roiRect_inner), fgPriorScaled, newSize);
    cv::Mat bgPriorScaled;
    if (bgPrior.cols == 0)
        bgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1);
    else
        cv::resize(bgPrior(roiRect_inner), bgPriorScaled, newSize);

    //backproject pixels likelihood
    cv::Mat foregroundLikelihood = hist_target.backProject(imgChannelsROI_inner).mul(fgPriorScaled);
    cv::Mat backgroundLikelihood = hist_background.backProject(imgChannelsROI_inner).mul(bgPriorScaled);

    //convert likelihoods to posterior prob. (Bayes rule)
    cv::Mat prob_o(newSize, foregroundLikelihood.type());
    prob_o = p_o*foregroundLikelihood / (p_o*foregroundLikelihood + p_b*backgroundLikelihood);
    cv::Mat prob_b = 1.0 - prob_o;

    std::pair<cv::Mat, cv::Mat> sizedProbs = getRegularizedSegmentation(prob_o, prob_b,
            fgPriorScaled, bgPriorScaled);
    //std::pair<cv::Mat, cv::Mat> sizedProbs = std::pair<cv::Mat, cv::Mat>(prob_o, prob_b);

    //resize probs to original size
    std::pair<cv::Mat, cv::Mat> probs;
    cv::resize(sizedProbs.first, probs.first, cv::Size(roiRect_inner.width, roiRect_inner.height));
    cv::resize(sizedProbs.second, probs.second, cv::Size(roiRect_inner.width, roiRect_inner.height));

    return probs;
}

std::pair<cv::Mat, cv::Mat> Segment::computePosteriors2(std::vector<cv::Mat> &imgChannels,
        cv::Mat fgPrior, cv::Mat bgPrior, Histogram hist_target, Histogram hist_background)
{
    //preprocess and normalize all data
    CV_Assert(imgChannels.size() > 0);

    //fit target to the image
    int x1 = 0;
    int y1 = 0;
    int x2 = imgChannels[0].cols-1;
    int y2 = imgChannels[0].rows-1;

    //compute resize factor so that we control the max area ~32^2
    double factor = sqrt(1000./((x2-x1)*(y2-y1)));
    //double factor = 1;
    if (factor > 1)
        factor = 1.0;
    cv::Size newSize(cvFloor((x2-x1)*factor), cvFloor((y2-y1)*factor));

    //rescale input data
    cv::Rect roiRect_inner = cv::Rect(x1, y1, x2-x1+1, y2-y1+1);
    std::vector<cv::Mat> imgChannelsROI_inner(imgChannels.size());
    for (size_t i = 0; i < imgChannels.size(); ++i)
        cv::resize(imgChannels[i](roiRect_inner), imgChannelsROI_inner[i], newSize);

    //initialize priors if there is no external source and rescale
    cv::Mat fgPriorScaled;
    if (fgPrior.cols == 0)
        fgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1);
    else
        cv::resize(fgPrior(roiRect_inner), fgPriorScaled, newSize);

    cv::Mat bgPriorScaled;
    if (bgPrior.cols == 0)
        bgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1);
    else
        cv::resize(bgPrior(roiRect_inner), bgPriorScaled, newSize);

    //backproject pixels likelihood
    cv::Mat foregroundLikelihood = hist_target.backProject(imgChannelsROI_inner).mul(fgPriorScaled);
    cv::Mat backgroundLikelihood = hist_background.backProject(imgChannelsROI_inner).mul(bgPriorScaled);

    //prior for posterior, relative to the number of pixels in bg and fg
    double p_b = 5./3.;
    double p_o = 1./(p_b + 1);

    //convert likelihoods to posterior prob. (Bayes rule)
    cv::Mat prob_o(newSize, foregroundLikelihood.type());
    prob_o = p_o*foregroundLikelihood / (p_o*foregroundLikelihood + p_b*backgroundLikelihood);
    cv::Mat prob_b = 1.0 - prob_o;

    std::pair<cv::Mat, cv::Mat> sizedProbs = getRegularizedSegmentation(prob_o, prob_b, fgPriorScaled, bgPriorScaled);

    //resize probs to original size
    std::pair<cv::Mat, cv::Mat> probs;
    cv::resize(sizedProbs.first, probs.first, cv::Size(roiRect_inner.width, roiRect_inner.height));
    cv::resize(sizedProbs.second, probs.second, cv::Size(roiRect_inner.width, roiRect_inner.height));

    return probs;
}

std::pair<cv::Mat, cv::Mat> Segment::getRegularizedSegmentation(
        cv::Mat &prob_o, cv::Mat &prob_b, cv::Mat & prior_o, cv::Mat & prior_b)
{
    int hsize = cvFloor(std::max(1.0, (double)cvFloor(static_cast<double>(prob_b.cols)*3./50. + 0.5)));
    int lambdaSize = hsize*2+1;

    //compute gaussian kernel
    cv::Mat lambda(lambdaSize, lambdaSize, CV_64FC1);
    double std2 = std::pow(hsize/3.0, 2);
    double sumLambda = 0.0;
    for (int y = -hsize; y < hsize + 1; ++y){
        double * lambdaPtr = lambda.ptr<double>(y+hsize);
        double tmp_y = y*y;
        for (int x = -hsize; x < hsize +1; ++x){
            double tmp_gauss = gaussian(x*x, tmp_y, std2);
            lambdaPtr[x+hsize] = tmp_gauss;
            sumLambda += tmp_gauss;
        }
    }
    sumLambda -= lambda.at<double>(hsize, hsize);
    //set center of kernel to 0
    lambda.at<double>(hsize, hsize) = 0.0;
    sumLambda = 1.0/sumLambda;
    //normalize kernel to sum to 1
    lambda = lambda*sumLambda;

    //create lambda2 kernel
    cv::Mat lambda2 = lambda.clone();
    lambda2.at<double>(hsize, hsize) = 1.0;

    double terminateThr = 1e-1;
    double logLike = std::numeric_limits<double>::max();
    int maxIter = 50;

    //return values
    cv::Mat Qsum_o(prior_o.rows, prior_o.cols, prior_o.type());
    cv::Mat Qsum_b(prior_o.rows, prior_o.cols, prior_o.type());

    //algorithm temporal
    cv::Mat Si_o(prior_o.rows, prior_o.cols, prior_o.type());
    cv::Mat Si_b(prior_o.rows, prior_o.cols, prior_o.type());
    cv::Mat Ssum_o(prior_o.rows, prior_o.cols, prior_o.type());
    cv::Mat Ssum_b(prior_o.rows, prior_o.cols, prior_o.type());
    cv::Mat Qi_o(prior_o.rows, prior_o.cols, prior_o.type());
    cv::Mat Qi_b(prior_o.rows, prior_o.cols, prior_o.type());
    cv::Mat logQo(prior_o.rows, prior_o.cols, prior_o.type());
    cv::Mat logQb(prior_o.rows, prior_o.cols, prior_o.type());

    int i;
    for (i = 0; i < maxIter; ++i){
        //follows the equations from Kristan et al. ACCV2014 paper
        //"A graphical model for rapid obstacle image-map estimation from unmanned surface vehicles"
        cv::Mat P_Io = prior_o.mul(prob_o) + std::numeric_limits<double>::epsilon();
        cv::Mat P_Ib = prior_b.mul(prob_b) + std::numeric_limits<double>::epsilon();

        cv::filter2D(prior_o, Si_o, -1, lambda, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
        cv::filter2D(prior_b, Si_b, -1, lambda, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
        Si_o = Si_o.mul(prior_o);
        Si_b = Si_b.mul(prior_b);
        cv::Mat normSi = 1.0/(Si_o + Si_b);
        Si_o = Si_o.mul(normSi);
        Si_b = Si_b.mul(normSi);
        cv::filter2D(Si_o, Ssum_o, -1, lambda2, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
        cv::filter2D(Si_b, Ssum_b, -1, lambda2, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);

        cv::filter2D(P_Io, Qi_o, -1, lambda, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
        cv::filter2D(P_Ib, Qi_b, -1, lambda, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
        Qi_o = Qi_o.mul(P_Io);
        Qi_b = Qi_b.mul(P_Ib);
        cv::Mat normQi = 1.0/(Qi_o + Qi_b);
        Qi_o = Qi_o.mul(normQi);
        Qi_b = Qi_b.mul(normQi);
        cv::filter2D(Qi_o, Qsum_o, -1, lambda2, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
        cv::filter2D(Qi_b, Qsum_b, -1, lambda2, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);

        prior_o = (Qsum_o + Ssum_o)*0.25;
        prior_b = (Qsum_b + Ssum_b)*0.25;
        cv::Mat normPI = 1.0/(prior_o + prior_b);
        prior_o = prior_o.mul(normPI);
        prior_b = prior_b.mul(normPI);

        //converge ?
        cv::log(Qsum_o, logQo);
        cv::log(Qsum_b, logQb);
        cv::Scalar mean = cv::sum(logQo+logQb);
        double logLikeNew = -mean.val[0]/(2*Qsum_o.rows*Qsum_o.cols);
        if (std::abs(logLike - logLikeNew) < terminateThr)
            break;
        logLike = logLikeNew;
    }
    return std::pair<cv::Mat, cv::Mat>(Qsum_o, Qsum_b);
}

} //cv namespace
//---------------------------------------------------------------------------------------------------------------------