mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 14:13:15 +08:00
refactor cudaoptflow public API:
* use opaque algorithm interfaces * add stream support
This commit is contained in:
parent
19c6bbe7d9
commit
381216aa54
@ -61,49 +61,94 @@ namespace cv { namespace cuda {
|
||||
//! @addtogroup cudaoptflow
|
||||
//! @{
|
||||
|
||||
/** @brief Class computing the optical flow for two images using Brox et al Optical Flow algorithm
|
||||
(@cite Brox2004). :
|
||||
//
|
||||
// Interface
|
||||
//
|
||||
|
||||
/** @brief Base interface for dense optical flow algorithms.
|
||||
*/
|
||||
class CV_EXPORTS BroxOpticalFlow
|
||||
class CV_EXPORTS DenseOpticalFlow : public Algorithm
|
||||
{
|
||||
public:
|
||||
BroxOpticalFlow(float alpha_, float gamma_, float scale_factor_, int inner_iterations_, int outer_iterations_, int solver_iterations_) :
|
||||
alpha(alpha_), gamma(gamma_), scale_factor(scale_factor_),
|
||||
inner_iterations(inner_iterations_), outer_iterations(outer_iterations_), solver_iterations(solver_iterations_)
|
||||
{
|
||||
}
|
||||
/** @brief Calculates a dense optical flow.
|
||||
|
||||
//! Compute optical flow
|
||||
//! frame0 - source frame (supports only CV_32FC1 type)
|
||||
//! frame1 - frame to track (with the same size and type as frame0)
|
||||
//! u - flow horizontal component (along x axis)
|
||||
//! v - flow vertical component (along y axis)
|
||||
void operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& stream = Stream::Null());
|
||||
|
||||
//! flow smoothness
|
||||
float alpha;
|
||||
|
||||
//! gradient constancy importance
|
||||
float gamma;
|
||||
|
||||
//! pyramid scale factor
|
||||
float scale_factor;
|
||||
|
||||
//! number of lagged non-linearity iterations (inner loop)
|
||||
int inner_iterations;
|
||||
|
||||
//! number of warping iterations (number of pyramid levels)
|
||||
int outer_iterations;
|
||||
|
||||
//! number of linear system solver iterations
|
||||
int solver_iterations;
|
||||
|
||||
GpuMat buf;
|
||||
@param I0 first input image.
|
||||
@param I1 second input image of the same size and the same type as I0.
|
||||
@param flow computed flow image that has the same size as I0 and type CV_32FC2.
|
||||
@param stream Stream for the asynchronous version.
|
||||
*/
|
||||
virtual void calc(InputArray I0, InputArray I1, InputOutputArray flow, Stream& stream = Stream::Null()) = 0;
|
||||
};
|
||||
|
||||
/** @brief Class used for calculating an optical flow.
|
||||
/** @brief Base interface for sparse optical flow algorithms.
|
||||
*/
|
||||
class CV_EXPORTS SparseOpticalFlow : public Algorithm
|
||||
{
|
||||
public:
|
||||
/** @brief Calculates a sparse optical flow.
|
||||
|
||||
The class can calculate an optical flow for a sparse feature set or dense optical flow using the
|
||||
@param prevImg First input image.
|
||||
@param nextImg Second input image of the same size and the same type as prevImg.
|
||||
@param prevPts Vector of 2D points for which the flow needs to be found.
|
||||
@param nextPts Output vector of 2D points containing the calculated new positions of input features in the second image.
|
||||
@param status Output status vector. Each element of the vector is set to 1 if the
|
||||
flow for the corresponding features has been found. Otherwise, it is set to 0.
|
||||
@param err Optional output vector that contains error response for each point (inverse confidence).
|
||||
@param stream Stream for the asynchronous version.
|
||||
*/
|
||||
virtual void calc(InputArray prevImg, InputArray nextImg,
|
||||
InputArray prevPts, InputOutputArray nextPts,
|
||||
OutputArray status,
|
||||
OutputArray err = cv::noArray(),
|
||||
Stream& stream = Stream::Null()) = 0;
|
||||
};
|
||||
|
||||
//
|
||||
// BroxOpticalFlow
|
||||
//
|
||||
|
||||
/** @brief Class computing the optical flow for two images using Brox et al Optical Flow algorithm (@cite Brox2004).
|
||||
*/
|
||||
class CV_EXPORTS BroxOpticalFlow : public DenseOpticalFlow
|
||||
{
|
||||
public:
|
||||
virtual double getFlowSmoothness() const = 0;
|
||||
virtual void setFlowSmoothness(double alpha) = 0;
|
||||
|
||||
virtual double getGradientConstancyImportance() const = 0;
|
||||
virtual void setGradientConstancyImportance(double gamma) = 0;
|
||||
|
||||
virtual double getPyramidScaleFactor() const = 0;
|
||||
virtual void setPyramidScaleFactor(double scale_factor) = 0;
|
||||
|
||||
//! number of lagged non-linearity iterations (inner loop)
|
||||
virtual int getInnerIterations() const = 0;
|
||||
virtual void setInnerIterations(int inner_iterations) = 0;
|
||||
|
||||
//! number of warping iterations (number of pyramid levels)
|
||||
virtual int getOuterIterations() const = 0;
|
||||
virtual void setOuterIterations(int outer_iterations) = 0;
|
||||
|
||||
//! number of linear system solver iterations
|
||||
virtual int getSolverIterations() const = 0;
|
||||
virtual void setSolverIterations(int solver_iterations) = 0;
|
||||
|
||||
static Ptr<BroxOpticalFlow> create(
|
||||
double alpha = 0.197,
|
||||
double gamma = 50.0,
|
||||
double scale_factor = 0.8,
|
||||
int inner_iterations = 5,
|
||||
int outer_iterations = 150,
|
||||
int solver_iterations = 10);
|
||||
};
|
||||
|
||||
//
|
||||
// PyrLKOpticalFlow
|
||||
//
|
||||
|
||||
/** @brief Class used for calculating a sparse optical flow.
|
||||
|
||||
The class can calculate an optical flow for a sparse feature set using the
|
||||
iterative Lucas-Kanade method with pyramids.
|
||||
|
||||
@sa calcOpticalFlowPyrLK
|
||||
@ -112,158 +157,116 @@ iterative Lucas-Kanade method with pyramids.
|
||||
- An example of the Lucas Kanade optical flow algorithm can be found at
|
||||
opencv_source_code/samples/gpu/pyrlk_optical_flow.cpp
|
||||
*/
|
||||
class CV_EXPORTS PyrLKOpticalFlow
|
||||
class CV_EXPORTS SparsePyrLKOpticalFlow : public SparseOpticalFlow
|
||||
{
|
||||
public:
|
||||
PyrLKOpticalFlow();
|
||||
virtual Size getWinSize() const = 0;
|
||||
virtual void setWinSize(Size winSize) = 0;
|
||||
|
||||
/** @brief Calculate an optical flow for a sparse feature set.
|
||||
virtual int getMaxLevel() const = 0;
|
||||
virtual void setMaxLevel(int maxLevel) = 0;
|
||||
|
||||
@param prevImg First 8-bit input image (supports both grayscale and color images).
|
||||
@param nextImg Second input image of the same size and the same type as prevImg .
|
||||
@param prevPts Vector of 2D points for which the flow needs to be found. It must be one row matrix
|
||||
with CV_32FC2 type.
|
||||
@param nextPts Output vector of 2D points (with single-precision floating-point coordinates)
|
||||
containing the calculated new positions of input features in the second image. When useInitialFlow
|
||||
is true, the vector must have the same size as in the input.
|
||||
@param status Output status vector (CV_8UC1 type). Each element of the vector is set to 1 if the
|
||||
flow for the corresponding features has been found. Otherwise, it is set to 0.
|
||||
@param err Output vector (CV_32FC1 type) that contains the difference between patches around the
|
||||
original and moved points or min eigen value if getMinEigenVals is checked. It can be NULL, if not
|
||||
needed.
|
||||
virtual int getNumIters() const = 0;
|
||||
virtual void setNumIters(int iters) = 0;
|
||||
|
||||
@sa calcOpticalFlowPyrLK
|
||||
*/
|
||||
void sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts,
|
||||
GpuMat& status, GpuMat* err = 0);
|
||||
virtual bool getUseInitialFlow() const = 0;
|
||||
virtual void setUseInitialFlow(bool useInitialFlow) = 0;
|
||||
|
||||
/** @brief Calculate dense optical flow.
|
||||
|
||||
@param prevImg First 8-bit grayscale input image.
|
||||
@param nextImg Second input image of the same size and the same type as prevImg .
|
||||
@param u Horizontal component of the optical flow of the same size as input images, 32-bit
|
||||
floating-point, single-channel
|
||||
@param v Vertical component of the optical flow of the same size as input images, 32-bit
|
||||
floating-point, single-channel
|
||||
@param err Output vector (CV_32FC1 type) that contains the difference between patches around the
|
||||
original and moved points or min eigen value if getMinEigenVals is checked. It can be NULL, if not
|
||||
needed.
|
||||
*/
|
||||
void dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err = 0);
|
||||
|
||||
/** @brief Releases inner buffers memory.
|
||||
*/
|
||||
void releaseMemory();
|
||||
|
||||
Size winSize;
|
||||
int maxLevel;
|
||||
int iters;
|
||||
bool useInitialFlow;
|
||||
|
||||
private:
|
||||
std::vector<GpuMat> prevPyr_;
|
||||
std::vector<GpuMat> nextPyr_;
|
||||
|
||||
GpuMat buf_;
|
||||
|
||||
GpuMat uPyr_[2];
|
||||
GpuMat vPyr_[2];
|
||||
static Ptr<SparsePyrLKOpticalFlow> create(
|
||||
Size winSize = Size(21, 21),
|
||||
int maxLevel = 3,
|
||||
int iters = 30,
|
||||
bool useInitialFlow = false);
|
||||
};
|
||||
|
||||
/** @brief Class computing a dense optical flow using the Gunnar Farneback’s algorithm. :
|
||||
/** @brief Class used for calculating a dense optical flow.
|
||||
|
||||
The class can calculate an optical flow for a dense optical flow using the
|
||||
iterative Lucas-Kanade method with pyramids.
|
||||
*/
|
||||
class CV_EXPORTS FarnebackOpticalFlow
|
||||
class CV_EXPORTS DensePyrLKOpticalFlow : public DenseOpticalFlow
|
||||
{
|
||||
public:
|
||||
FarnebackOpticalFlow()
|
||||
{
|
||||
numLevels = 5;
|
||||
pyrScale = 0.5;
|
||||
fastPyramids = false;
|
||||
winSize = 13;
|
||||
numIters = 10;
|
||||
polyN = 5;
|
||||
polySigma = 1.1;
|
||||
flags = 0;
|
||||
}
|
||||
virtual Size getWinSize() const = 0;
|
||||
virtual void setWinSize(Size winSize) = 0;
|
||||
|
||||
int numLevels;
|
||||
double pyrScale;
|
||||
bool fastPyramids;
|
||||
int winSize;
|
||||
int numIters;
|
||||
int polyN;
|
||||
double polySigma;
|
||||
int flags;
|
||||
virtual int getMaxLevel() const = 0;
|
||||
virtual void setMaxLevel(int maxLevel) = 0;
|
||||
|
||||
/** @brief Computes a dense optical flow using the Gunnar Farneback’s algorithm.
|
||||
virtual int getNumIters() const = 0;
|
||||
virtual void setNumIters(int iters) = 0;
|
||||
|
||||
@param frame0 First 8-bit gray-scale input image
|
||||
@param frame1 Second 8-bit gray-scale input image
|
||||
@param flowx Flow horizontal component
|
||||
@param flowy Flow vertical component
|
||||
@param s Stream
|
||||
virtual bool getUseInitialFlow() const = 0;
|
||||
virtual void setUseInitialFlow(bool useInitialFlow) = 0;
|
||||
|
||||
@sa calcOpticalFlowFarneback
|
||||
*/
|
||||
void operator ()(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &s = Stream::Null());
|
||||
|
||||
/** @brief Releases unused auxiliary memory buffers.
|
||||
*/
|
||||
void releaseMemory()
|
||||
{
|
||||
frames_[0].release();
|
||||
frames_[1].release();
|
||||
pyrLevel_[0].release();
|
||||
pyrLevel_[1].release();
|
||||
M_.release();
|
||||
bufM_.release();
|
||||
R_[0].release();
|
||||
R_[1].release();
|
||||
blurredFrame_[0].release();
|
||||
blurredFrame_[1].release();
|
||||
pyramid0_.clear();
|
||||
pyramid1_.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
void prepareGaussian(
|
||||
int n, double sigma, float *g, float *xg, float *xxg,
|
||||
double &ig11, double &ig03, double &ig33, double &ig55);
|
||||
|
||||
void setPolynomialExpansionConsts(int n, double sigma);
|
||||
|
||||
void updateFlow_boxFilter(
|
||||
const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy,
|
||||
GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
|
||||
|
||||
void updateFlow_gaussianBlur(
|
||||
const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy,
|
||||
GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
|
||||
|
||||
GpuMat frames_[2];
|
||||
GpuMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
|
||||
std::vector<GpuMat> pyramid0_, pyramid1_;
|
||||
static Ptr<DensePyrLKOpticalFlow> create(
|
||||
Size winSize = Size(13, 13),
|
||||
int maxLevel = 3,
|
||||
int iters = 30,
|
||||
bool useInitialFlow = false);
|
||||
};
|
||||
|
||||
// Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
|
||||
//
|
||||
// see reference:
|
||||
// [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
|
||||
// [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
|
||||
class CV_EXPORTS OpticalFlowDual_TVL1_CUDA
|
||||
// FarnebackOpticalFlow
|
||||
//
|
||||
|
||||
/** @brief Class computing a dense optical flow using the Gunnar Farneback’s algorithm.
|
||||
*/
|
||||
class CV_EXPORTS FarnebackOpticalFlow : public DenseOpticalFlow
|
||||
{
|
||||
public:
|
||||
OpticalFlowDual_TVL1_CUDA();
|
||||
virtual int getNumLevels() const = 0;
|
||||
virtual void setNumLevels(int numLevels) = 0;
|
||||
|
||||
void operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy);
|
||||
virtual double getPyrScale() const = 0;
|
||||
virtual void setPyrScale(double pyrScale) = 0;
|
||||
|
||||
void collectGarbage();
|
||||
virtual bool getFastPyramids() const = 0;
|
||||
virtual void setFastPyramids(bool fastPyramids) = 0;
|
||||
|
||||
virtual int getWinSize() const = 0;
|
||||
virtual void setWinSize(int winSize) = 0;
|
||||
|
||||
virtual int getNumIters() const = 0;
|
||||
virtual void setNumIters(int numIters) = 0;
|
||||
|
||||
virtual int getPolyN() const = 0;
|
||||
virtual void setPolyN(int polyN) = 0;
|
||||
|
||||
virtual double getPolySigma() const = 0;
|
||||
virtual void setPolySigma(double polySigma) = 0;
|
||||
|
||||
virtual int getFlags() const = 0;
|
||||
virtual void setFlags(int flags) = 0;
|
||||
|
||||
static Ptr<FarnebackOpticalFlow> create(
|
||||
int numLevels = 5,
|
||||
double pyrScale = 0.5,
|
||||
bool fastPyramids = false,
|
||||
int winSize = 13,
|
||||
int numIters = 10,
|
||||
int polyN = 5,
|
||||
double polySigma = 1.1,
|
||||
int flags = 0);
|
||||
};
|
||||
|
||||
//
|
||||
// OpticalFlowDual_TVL1
|
||||
//
|
||||
|
||||
/** @brief Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method.
|
||||
*
|
||||
* @sa C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
|
||||
* @sa Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
|
||||
*/
|
||||
class CV_EXPORTS OpticalFlowDual_TVL1 : public DenseOpticalFlow
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Time step of the numerical scheme.
|
||||
*/
|
||||
double tau;
|
||||
virtual double getTau() const = 0;
|
||||
virtual void setTau(double tau) = 0;
|
||||
|
||||
/**
|
||||
* Weight parameter for the data term, attachment parameter.
|
||||
@ -271,7 +274,8 @@ public:
|
||||
* The smaller this parameter is, the smoother the solutions we obtain.
|
||||
* It depends on the range of motions of the images, so its value should be adapted to each image sequence.
|
||||
*/
|
||||
double lambda;
|
||||
virtual double getLambda() const = 0;
|
||||
virtual void setLambda(double lambda) = 0;
|
||||
|
||||
/**
|
||||
* Weight parameter for (u - v)^2, tightness parameter.
|
||||
@ -279,20 +283,23 @@ public:
|
||||
* In theory, it should have a small value in order to maintain both parts in correspondence.
|
||||
* The method is stable for a large range of values of this parameter.
|
||||
*/
|
||||
virtual double getGamma() const = 0;
|
||||
virtual void setGamma(double gamma) = 0;
|
||||
|
||||
double gamma;
|
||||
/**
|
||||
* parameter used for motion estimation. It adds a variable allowing for illumination variations
|
||||
* Set this parameter to 1. if you have varying illumination.
|
||||
* See: Chambolle et al, A First-Order Primal-Dual Algorithm for Convex Problems with Applications to Imaging
|
||||
* Journal of Mathematical imaging and vision, may 2011 Vol 40 issue 1, pp 120-145
|
||||
*/
|
||||
double theta;
|
||||
* parameter used for motion estimation. It adds a variable allowing for illumination variations
|
||||
* Set this parameter to 1. if you have varying illumination.
|
||||
* See: Chambolle et al, A First-Order Primal-Dual Algorithm for Convex Problems with Applications to Imaging
|
||||
* Journal of Mathematical imaging and vision, may 2011 Vol 40 issue 1, pp 120-145
|
||||
*/
|
||||
virtual double getTheta() const = 0;
|
||||
virtual void setTheta(double theta) = 0;
|
||||
|
||||
/**
|
||||
* Number of scales used to create the pyramid of images.
|
||||
*/
|
||||
int nscales;
|
||||
virtual int getNumScales() const = 0;
|
||||
virtual void setNumScales(int nscales) = 0;
|
||||
|
||||
/**
|
||||
* Number of warpings per scale.
|
||||
@ -300,51 +307,39 @@ public:
|
||||
* This is a parameter that assures the stability of the method.
|
||||
* It also affects the running time, so it is a compromise between speed and accuracy.
|
||||
*/
|
||||
int warps;
|
||||
virtual int getNumWarps() const = 0;
|
||||
virtual void setNumWarps(int warps) = 0;
|
||||
|
||||
/**
|
||||
* Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
|
||||
* A small value will yield more accurate solutions at the expense of a slower convergence.
|
||||
*/
|
||||
double epsilon;
|
||||
virtual double getEpsilon() const = 0;
|
||||
virtual void setEpsilon(double epsilon) = 0;
|
||||
|
||||
/**
|
||||
* Stopping criterion iterations number used in the numerical scheme.
|
||||
*/
|
||||
int iterations;
|
||||
virtual int getNumIterations() const = 0;
|
||||
virtual void setNumIterations(int iterations) = 0;
|
||||
|
||||
double scaleStep;
|
||||
virtual double getScaleStep() const = 0;
|
||||
virtual void setScaleStep(double scaleStep) = 0;
|
||||
|
||||
bool useInitialFlow;
|
||||
virtual bool getUseInitialFlow() const = 0;
|
||||
virtual void setUseInitialFlow(bool useInitialFlow) = 0;
|
||||
|
||||
private:
|
||||
void procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2, GpuMat& u3);
|
||||
|
||||
std::vector<GpuMat> I0s;
|
||||
std::vector<GpuMat> I1s;
|
||||
std::vector<GpuMat> u1s;
|
||||
std::vector<GpuMat> u2s;
|
||||
std::vector<GpuMat> u3s;
|
||||
|
||||
GpuMat I1x_buf;
|
||||
GpuMat I1y_buf;
|
||||
|
||||
GpuMat I1w_buf;
|
||||
GpuMat I1wx_buf;
|
||||
GpuMat I1wy_buf;
|
||||
|
||||
GpuMat grad_buf;
|
||||
GpuMat rho_c_buf;
|
||||
|
||||
GpuMat p11_buf;
|
||||
GpuMat p12_buf;
|
||||
GpuMat p21_buf;
|
||||
GpuMat p22_buf;
|
||||
GpuMat p31_buf;
|
||||
GpuMat p32_buf;
|
||||
|
||||
GpuMat diff_buf;
|
||||
GpuMat norm_buf;
|
||||
static Ptr<OpticalFlowDual_TVL1> create(
|
||||
double tau = 0.25,
|
||||
double lambda = 0.15,
|
||||
double theta = 0.3,
|
||||
int nscales = 5,
|
||||
int warps = 5,
|
||||
double epsilon = 0.01,
|
||||
int iterations = 300,
|
||||
double scaleStep = 0.8,
|
||||
double gamma = 0.0,
|
||||
bool useInitialFlow = false);
|
||||
};
|
||||
|
||||
//! @}
|
||||
|
@ -47,84 +47,148 @@ using namespace cv::cuda;
|
||||
|
||||
#if !defined (HAVE_CUDA) || !defined (HAVE_OPENCV_CUDALEGACY) || defined (CUDA_DISABLER)
|
||||
|
||||
void cv::cuda::BroxOpticalFlow::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
Ptr<BroxOpticalFlow> cv::cuda::BroxOpticalFlow::create(double, double, double, int, int, int) { throw_no_cuda(); return Ptr<BroxOpticalFlow>(); }
|
||||
|
||||
#else
|
||||
|
||||
namespace
|
||||
{
|
||||
size_t getBufSize(const NCVBroxOpticalFlowDescriptor& desc, const NCVMatrix<Ncv32f>& frame0, const NCVMatrix<Ncv32f>& frame1,
|
||||
NCVMatrix<Ncv32f>& u, NCVMatrix<Ncv32f>& v, const cudaDeviceProp& devProp)
|
||||
namespace {
|
||||
|
||||
class BroxOpticalFlowImpl : public BroxOpticalFlow
|
||||
{
|
||||
NCVMemStackAllocator gpuCounter(static_cast<Ncv32u>(devProp.textureAlignment));
|
||||
public:
|
||||
BroxOpticalFlowImpl(double alpha, double gamma, double scale_factor,
|
||||
int inner_iterations, int outer_iterations, int solver_iterations) :
|
||||
alpha_(alpha), gamma_(gamma), scale_factor_(scale_factor),
|
||||
inner_iterations_(inner_iterations), outer_iterations_(outer_iterations),
|
||||
solver_iterations_(solver_iterations)
|
||||
{
|
||||
}
|
||||
|
||||
virtual void calc(InputArray I0, InputArray I1, InputOutputArray flow, Stream& stream);
|
||||
|
||||
virtual double getFlowSmoothness() const { return alpha_; }
|
||||
virtual void setFlowSmoothness(double alpha) { alpha_ = static_cast<float>(alpha); }
|
||||
|
||||
virtual double getGradientConstancyImportance() const { return gamma_; }
|
||||
virtual void setGradientConstancyImportance(double gamma) { gamma_ = static_cast<float>(gamma); }
|
||||
|
||||
virtual double getPyramidScaleFactor() const { return scale_factor_; }
|
||||
virtual void setPyramidScaleFactor(double scale_factor) { scale_factor_ = static_cast<float>(scale_factor); }
|
||||
|
||||
//! number of lagged non-linearity iterations (inner loop)
|
||||
virtual int getInnerIterations() const { return inner_iterations_; }
|
||||
virtual void setInnerIterations(int inner_iterations) { inner_iterations_ = inner_iterations; }
|
||||
|
||||
//! number of warping iterations (number of pyramid levels)
|
||||
virtual int getOuterIterations() const { return outer_iterations_; }
|
||||
virtual void setOuterIterations(int outer_iterations) { outer_iterations_ = outer_iterations; }
|
||||
|
||||
//! number of linear system solver iterations
|
||||
virtual int getSolverIterations() const { return solver_iterations_; }
|
||||
virtual void setSolverIterations(int solver_iterations) { solver_iterations_ = solver_iterations; }
|
||||
|
||||
private:
|
||||
//! flow smoothness
|
||||
float alpha_;
|
||||
|
||||
//! gradient constancy importance
|
||||
float gamma_;
|
||||
|
||||
//! pyramid scale factor
|
||||
float scale_factor_;
|
||||
|
||||
//! number of lagged non-linearity iterations (inner loop)
|
||||
int inner_iterations_;
|
||||
|
||||
//! number of warping iterations (number of pyramid levels)
|
||||
int outer_iterations_;
|
||||
|
||||
//! number of linear system solver iterations
|
||||
int solver_iterations_;
|
||||
};
|
||||
|
||||
static size_t getBufSize(const NCVBroxOpticalFlowDescriptor& desc,
|
||||
const NCVMatrix<Ncv32f>& frame0, const NCVMatrix<Ncv32f>& frame1,
|
||||
NCVMatrix<Ncv32f>& u, NCVMatrix<Ncv32f>& v,
|
||||
size_t textureAlignment)
|
||||
{
|
||||
NCVMemStackAllocator gpuCounter(static_cast<Ncv32u>(textureAlignment));
|
||||
|
||||
ncvSafeCall( NCVBroxOpticalFlow(desc, gpuCounter, frame0, frame1, u, v, 0) );
|
||||
|
||||
return gpuCounter.maxSize();
|
||||
}
|
||||
|
||||
static void outputHandler(const String &msg)
|
||||
{
|
||||
CV_Error(cv::Error::GpuApiCallError, msg.c_str());
|
||||
}
|
||||
|
||||
void BroxOpticalFlowImpl::calc(InputArray _I0, InputArray _I1, InputOutputArray _flow, Stream& stream)
|
||||
{
|
||||
const GpuMat frame0 = _I0.getGpuMat();
|
||||
const GpuMat frame1 = _I1.getGpuMat();
|
||||
|
||||
CV_Assert( frame0.type() == CV_32FC1 );
|
||||
CV_Assert( frame1.size() == frame0.size() && frame1.type() == frame0.type() );
|
||||
|
||||
ncvSetDebugOutputHandler(outputHandler);
|
||||
|
||||
BufferPool pool(stream);
|
||||
GpuMat u = pool.getBuffer(frame0.size(), CV_32FC1);
|
||||
GpuMat v = pool.getBuffer(frame0.size(), CV_32FC1);
|
||||
|
||||
NCVBroxOpticalFlowDescriptor desc;
|
||||
desc.alpha = alpha_;
|
||||
desc.gamma = gamma_;
|
||||
desc.scale_factor = scale_factor_;
|
||||
desc.number_of_inner_iterations = inner_iterations_;
|
||||
desc.number_of_outer_iterations = outer_iterations_;
|
||||
desc.number_of_solver_iterations = solver_iterations_;
|
||||
|
||||
NCVMemSegment frame0MemSeg;
|
||||
frame0MemSeg.begin.memtype = NCVMemoryTypeDevice;
|
||||
frame0MemSeg.begin.ptr = const_cast<uchar*>(frame0.data);
|
||||
frame0MemSeg.size = frame0.step * frame0.rows;
|
||||
|
||||
NCVMemSegment frame1MemSeg;
|
||||
frame1MemSeg.begin.memtype = NCVMemoryTypeDevice;
|
||||
frame1MemSeg.begin.ptr = const_cast<uchar*>(frame1.data);
|
||||
frame1MemSeg.size = frame1.step * frame1.rows;
|
||||
|
||||
NCVMemSegment uMemSeg;
|
||||
uMemSeg.begin.memtype = NCVMemoryTypeDevice;
|
||||
uMemSeg.begin.ptr = u.ptr();
|
||||
uMemSeg.size = u.step * u.rows;
|
||||
|
||||
NCVMemSegment vMemSeg;
|
||||
vMemSeg.begin.memtype = NCVMemoryTypeDevice;
|
||||
vMemSeg.begin.ptr = v.ptr();
|
||||
vMemSeg.size = v.step * v.rows;
|
||||
|
||||
DeviceInfo devInfo;
|
||||
size_t textureAlignment = devInfo.textureAlignment();
|
||||
|
||||
NCVMatrixReuse<Ncv32f> frame0Mat(frame0MemSeg, static_cast<Ncv32u>(textureAlignment), frame0.cols, frame0.rows, static_cast<Ncv32u>(frame0.step));
|
||||
NCVMatrixReuse<Ncv32f> frame1Mat(frame1MemSeg, static_cast<Ncv32u>(textureAlignment), frame1.cols, frame1.rows, static_cast<Ncv32u>(frame1.step));
|
||||
NCVMatrixReuse<Ncv32f> uMat(uMemSeg, static_cast<Ncv32u>(textureAlignment), u.cols, u.rows, static_cast<Ncv32u>(u.step));
|
||||
NCVMatrixReuse<Ncv32f> vMat(vMemSeg, static_cast<Ncv32u>(textureAlignment), v.cols, v.rows, static_cast<Ncv32u>(v.step));
|
||||
|
||||
size_t bufSize = getBufSize(desc, frame0Mat, frame1Mat, uMat, vMat, textureAlignment);
|
||||
GpuMat buf = pool.getBuffer(1, static_cast<int>(bufSize), CV_8UC1);
|
||||
|
||||
NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, static_cast<Ncv32u>(textureAlignment), buf.ptr());
|
||||
|
||||
ncvSafeCall( NCVBroxOpticalFlow(desc, gpuAllocator, frame0Mat, frame1Mat, uMat, vMat, StreamAccessor::getStream(stream)) );
|
||||
|
||||
GpuMat flows[] = {u, v};
|
||||
cuda::merge(flows, 2, _flow, stream);
|
||||
}
|
||||
}
|
||||
|
||||
namespace
|
||||
Ptr<BroxOpticalFlow> cv::cuda::BroxOpticalFlow::create(double alpha, double gamma, double scale_factor, int inner_iterations, int outer_iterations, int solver_iterations)
|
||||
{
|
||||
static void outputHandler(const String &msg) { CV_Error(cv::Error::GpuApiCallError, msg.c_str()); }
|
||||
}
|
||||
|
||||
void cv::cuda::BroxOpticalFlow::operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& s)
|
||||
{
|
||||
ncvSetDebugOutputHandler(outputHandler);
|
||||
|
||||
CV_Assert(frame0.type() == CV_32FC1);
|
||||
CV_Assert(frame1.size() == frame0.size() && frame1.type() == frame0.type());
|
||||
|
||||
u.create(frame0.size(), CV_32FC1);
|
||||
v.create(frame0.size(), CV_32FC1);
|
||||
|
||||
cudaDeviceProp devProp;
|
||||
cudaSafeCall( cudaGetDeviceProperties(&devProp, getDevice()) );
|
||||
|
||||
NCVBroxOpticalFlowDescriptor desc;
|
||||
|
||||
desc.alpha = alpha;
|
||||
desc.gamma = gamma;
|
||||
desc.scale_factor = scale_factor;
|
||||
desc.number_of_inner_iterations = inner_iterations;
|
||||
desc.number_of_outer_iterations = outer_iterations;
|
||||
desc.number_of_solver_iterations = solver_iterations;
|
||||
|
||||
NCVMemSegment frame0MemSeg;
|
||||
frame0MemSeg.begin.memtype = NCVMemoryTypeDevice;
|
||||
frame0MemSeg.begin.ptr = const_cast<uchar*>(frame0.data);
|
||||
frame0MemSeg.size = frame0.step * frame0.rows;
|
||||
|
||||
NCVMemSegment frame1MemSeg;
|
||||
frame1MemSeg.begin.memtype = NCVMemoryTypeDevice;
|
||||
frame1MemSeg.begin.ptr = const_cast<uchar*>(frame1.data);
|
||||
frame1MemSeg.size = frame1.step * frame1.rows;
|
||||
|
||||
NCVMemSegment uMemSeg;
|
||||
uMemSeg.begin.memtype = NCVMemoryTypeDevice;
|
||||
uMemSeg.begin.ptr = u.ptr();
|
||||
uMemSeg.size = u.step * u.rows;
|
||||
|
||||
NCVMemSegment vMemSeg;
|
||||
vMemSeg.begin.memtype = NCVMemoryTypeDevice;
|
||||
vMemSeg.begin.ptr = v.ptr();
|
||||
vMemSeg.size = v.step * v.rows;
|
||||
|
||||
NCVMatrixReuse<Ncv32f> frame0Mat(frame0MemSeg, static_cast<Ncv32u>(devProp.textureAlignment), frame0.cols, frame0.rows, static_cast<Ncv32u>(frame0.step));
|
||||
NCVMatrixReuse<Ncv32f> frame1Mat(frame1MemSeg, static_cast<Ncv32u>(devProp.textureAlignment), frame1.cols, frame1.rows, static_cast<Ncv32u>(frame1.step));
|
||||
NCVMatrixReuse<Ncv32f> uMat(uMemSeg, static_cast<Ncv32u>(devProp.textureAlignment), u.cols, u.rows, static_cast<Ncv32u>(u.step));
|
||||
NCVMatrixReuse<Ncv32f> vMat(vMemSeg, static_cast<Ncv32u>(devProp.textureAlignment), v.cols, v.rows, static_cast<Ncv32u>(v.step));
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
|
||||
size_t bufSize = getBufSize(desc, frame0Mat, frame1Mat, uMat, vMat, devProp);
|
||||
|
||||
ensureSizeIsEnough(1, static_cast<int>(bufSize), CV_8UC1, buf);
|
||||
|
||||
NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, static_cast<Ncv32u>(devProp.textureAlignment), buf.ptr());
|
||||
|
||||
ncvSafeCall( NCVBroxOpticalFlow(desc, gpuAllocator, frame0Mat, frame1Mat, uMat, vMat, stream) );
|
||||
return makePtr<BroxOpticalFlowImpl>(alpha, gamma, scale_factor, inner_iterations, outer_iterations, solver_iterations);
|
||||
}
|
||||
|
||||
#endif /* HAVE_CUDA */
|
||||
|
@ -472,16 +472,16 @@ namespace pyrlk
|
||||
}
|
||||
}
|
||||
|
||||
void loadConstants(int2 winSize, int iters)
|
||||
void loadConstants(int2 winSize, int iters, cudaStream_t stream)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_winSize_x, &winSize.x, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_winSize_y, &winSize.y, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbolAsync(c_winSize_x, &winSize.x, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
|
||||
cudaSafeCall( cudaMemcpyToSymbolAsync(c_winSize_y, &winSize.y, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
|
||||
|
||||
int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2);
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_halfWin_x, &halfWin.x, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_halfWin_y, &halfWin.y, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbolAsync(c_halfWin_x, &halfWin.x, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
|
||||
cudaSafeCall( cudaMemcpyToSymbolAsync(c_halfWin_y, &halfWin.y, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_iters, &iters, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbolAsync(c_iters, &iters, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
|
||||
}
|
||||
|
||||
void sparse1(PtrStepSzf I, PtrStepSzf J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
|
||||
|
@ -66,15 +66,16 @@ namespace tvl1flow
|
||||
dy(y, x) = 0.5f * (src(::min(y + 1, src.rows - 1), x) - src(::max(y - 1, 0), x));
|
||||
}
|
||||
|
||||
void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy)
|
||||
void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
||||
|
||||
centeredGradientKernel<<<grid, block>>>(src, dx, dy);
|
||||
centeredGradientKernel<<<grid, block, 0, stream>>>(src, dx, dy);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
if (!stream)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
@ -164,7 +165,10 @@ namespace tvl1flow
|
||||
rho(y, x) = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
|
||||
}
|
||||
|
||||
void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx, PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho)
|
||||
void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y,
|
||||
PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx,
|
||||
PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(I0.cols, block.x), divUp(I0.rows, block.y));
|
||||
@ -173,10 +177,11 @@ namespace tvl1flow
|
||||
bindTexture(&tex_I1x, I1x);
|
||||
bindTexture(&tex_I1y, I1y);
|
||||
|
||||
warpBackwardKernel<<<grid, block>>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho);
|
||||
warpBackwardKernel<<<grid, block, 0, stream>>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
if (!stream)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
@ -292,15 +297,17 @@ namespace tvl1flow
|
||||
PtrStepSzf grad, PtrStepSzf rho_c,
|
||||
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32,
|
||||
PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf error,
|
||||
float l_t, float theta, float gamma, bool calcError)
|
||||
float l_t, float theta, float gamma, bool calcError,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y));
|
||||
|
||||
estimateUKernel<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, error, l_t, theta, gamma, calcError);
|
||||
estimateUKernel<<<grid, block, 0, stream>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, error, l_t, theta, gamma, calcError);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
if (!stream)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
@ -346,15 +353,19 @@ namespace tvl1flow
|
||||
}
|
||||
}
|
||||
|
||||
void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32, float taut, float gamma)
|
||||
void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3,
|
||||
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32,
|
||||
float taut, float gamma,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(u1.cols, block.x), divUp(u1.rows, block.y));
|
||||
|
||||
estimateDualVariablesKernel<<<grid, block>>>(u1, u2, u3, p11, p12, p21, p22, p31, p32, taut, gamma);
|
||||
estimateDualVariablesKernel<<<grid, block, 0, stream>>>(u1, u2, u3, p11, p12, p21, p22, p31, p32, taut, gamma);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
if (!stream)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -42,23 +42,21 @@
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#define MIN_SIZE 32
|
||||
|
||||
#define S(x) StreamAccessor::getStream(x)
|
||||
|
||||
// CUDA resize() is fast, but it differs from the CPU analog. Disabling this flag
|
||||
// leads to an inefficient code. It's for debug purposes only.
|
||||
#define ENABLE_CUDA_RESIZE 1
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::cuda;
|
||||
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
|
||||
void cv::cuda::FarnebackOpticalFlow::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
Ptr<FarnebackOpticalFlow> cv::cuda::FarnebackOpticalFlow::create(int, double, bool, int, int, int, double, int) { throw_no_cuda(); return Ptr<BroxOpticalFlow>(); }
|
||||
|
||||
#else
|
||||
|
||||
#define MIN_SIZE 32
|
||||
|
||||
// CUDA resize() is fast, but it differs from the CPU analog. Disabling this flag
|
||||
// leads to an inefficient code. It's for debug purposes only.
|
||||
#define ENABLE_CUDA_RESIZE 1
|
||||
|
||||
namespace cv { namespace cuda { namespace device { namespace optflow_farneback
|
||||
{
|
||||
void setPolynomialExpansionConsts(
|
||||
@ -76,8 +74,6 @@ namespace cv { namespace cuda { namespace device { namespace optflow_farneback
|
||||
void updateFlowGpu(
|
||||
const PtrStepSzf M, PtrStepSzf flowx, PtrStepSzf flowy, cudaStream_t stream);
|
||||
|
||||
/*void boxFilterGpu(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream);*/
|
||||
|
||||
void boxFilter5Gpu(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream);
|
||||
|
||||
void boxFilter5Gpu_CC11(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream);
|
||||
@ -93,10 +89,93 @@ namespace cv { namespace cuda { namespace device { namespace optflow_farneback
|
||||
void gaussianBlur5Gpu_CC11(
|
||||
const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderType, cudaStream_t stream);
|
||||
|
||||
}}}} // namespace cv { namespace cuda { namespace cudev { namespace optflow_farneback
|
||||
}}}}
|
||||
|
||||
namespace
|
||||
{
|
||||
class FarnebackOpticalFlowImpl : public FarnebackOpticalFlow
|
||||
{
|
||||
public:
|
||||
FarnebackOpticalFlowImpl(int numLevels, double pyrScale, bool fastPyramids, int winSize,
|
||||
int numIters, int polyN, double polySigma, int flags) :
|
||||
numLevels_(numLevels), pyrScale_(pyrScale), fastPyramids_(fastPyramids), winSize_(winSize),
|
||||
numIters_(numIters), polyN_(polyN), polySigma_(polySigma), flags_(flags)
|
||||
{
|
||||
}
|
||||
|
||||
virtual int getNumLevels() const { return numLevels_; }
|
||||
virtual void setNumLevels(int numLevels) { numLevels_ = numLevels; }
|
||||
|
||||
virtual double getPyrScale() const { return pyrScale_; }
|
||||
virtual void setPyrScale(double pyrScale) { pyrScale_ = pyrScale; }
|
||||
|
||||
virtual bool getFastPyramids() const { return fastPyramids_; }
|
||||
virtual void setFastPyramids(bool fastPyramids) { fastPyramids_ = fastPyramids; }
|
||||
|
||||
virtual int getWinSize() const { return winSize_; }
|
||||
virtual void setWinSize(int winSize) { winSize_ = winSize; }
|
||||
|
||||
virtual int getNumIters() const { return numIters_; }
|
||||
virtual void setNumIters(int numIters) { numIters_ = numIters; }
|
||||
|
||||
virtual int getPolyN() const { return polyN_; }
|
||||
virtual void setPolyN(int polyN) { polyN_ = polyN; }
|
||||
|
||||
virtual double getPolySigma() const { return polySigma_; }
|
||||
virtual void setPolySigma(double polySigma) { polySigma_ = polySigma; }
|
||||
|
||||
virtual int getFlags() const { return flags_; }
|
||||
virtual void setFlags(int flags) { flags_ = flags; }
|
||||
|
||||
virtual void calc(InputArray I0, InputArray I1, InputOutputArray flow, Stream& stream);
|
||||
|
||||
private:
|
||||
int numLevels_;
|
||||
double pyrScale_;
|
||||
bool fastPyramids_;
|
||||
int winSize_;
|
||||
int numIters_;
|
||||
int polyN_;
|
||||
double polySigma_;
|
||||
int flags_;
|
||||
|
||||
private:
|
||||
void prepareGaussian(
|
||||
int n, double sigma, float *g, float *xg, float *xxg,
|
||||
double &ig11, double &ig03, double &ig33, double &ig55);
|
||||
|
||||
void setPolynomialExpansionConsts(int n, double sigma);
|
||||
|
||||
void updateFlow_boxFilter(
|
||||
const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy,
|
||||
GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
|
||||
|
||||
void updateFlow_gaussianBlur(
|
||||
const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy,
|
||||
GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
|
||||
|
||||
void calcImpl(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &stream);
|
||||
|
||||
GpuMat frames_[2];
|
||||
GpuMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
|
||||
std::vector<GpuMat> pyramid0_, pyramid1_;
|
||||
};
|
||||
|
||||
void FarnebackOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOutputArray _flow, Stream& stream)
|
||||
{
|
||||
const GpuMat frame0 = _frame0.getGpuMat();
|
||||
const GpuMat frame1 = _frame1.getGpuMat();
|
||||
|
||||
BufferPool pool(stream);
|
||||
GpuMat flowx = pool.getBuffer(frame0.size(), CV_32FC1);
|
||||
GpuMat flowy = pool.getBuffer(frame0.size(), CV_32FC1);
|
||||
|
||||
calcImpl(frame0, frame1, flowx, flowy, stream);
|
||||
|
||||
GpuMat flows[] = {flowx, flowy};
|
||||
cuda::merge(flows, 2, _flow, stream);
|
||||
}
|
||||
|
||||
GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat& mat)
|
||||
{
|
||||
if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols)
|
||||
@ -104,285 +183,287 @@ namespace
|
||||
|
||||
return mat = GpuMat(rows, cols, type);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::cuda::FarnebackOpticalFlow::prepareGaussian(
|
||||
int n, double sigma, float *g, float *xg, float *xxg,
|
||||
double &ig11, double &ig03, double &ig33, double &ig55)
|
||||
{
|
||||
double s = 0.;
|
||||
for (int x = -n; x <= n; x++)
|
||||
{
|
||||
g[x] = (float)std::exp(-x*x/(2*sigma*sigma));
|
||||
s += g[x];
|
||||
}
|
||||
|
||||
s = 1./s;
|
||||
for (int x = -n; x <= n; x++)
|
||||
{
|
||||
g[x] = (float)(g[x]*s);
|
||||
xg[x] = (float)(x*g[x]);
|
||||
xxg[x] = (float)(x*x*g[x]);
|
||||
}
|
||||
|
||||
Mat_<double> G(6, 6);
|
||||
G.setTo(0);
|
||||
|
||||
for (int y = -n; y <= n; y++)
|
||||
void FarnebackOpticalFlowImpl::prepareGaussian(
|
||||
int n, double sigma, float *g, float *xg, float *xxg,
|
||||
double &ig11, double &ig03, double &ig33, double &ig55)
|
||||
{
|
||||
double s = 0.;
|
||||
for (int x = -n; x <= n; x++)
|
||||
{
|
||||
G(0,0) += g[y]*g[x];
|
||||
G(1,1) += g[y]*g[x]*x*x;
|
||||
G(3,3) += g[y]*g[x]*x*x*x*x;
|
||||
G(5,5) += g[y]*g[x]*x*x*y*y;
|
||||
}
|
||||
}
|
||||
|
||||
//G[0][0] = 1.;
|
||||
G(2,2) = G(0,3) = G(0,4) = G(3,0) = G(4,0) = G(1,1);
|
||||
G(4,4) = G(3,3);
|
||||
G(3,4) = G(4,3) = G(5,5);
|
||||
|
||||
// invG:
|
||||
// [ x e e ]
|
||||
// [ y ]
|
||||
// [ y ]
|
||||
// [ e z ]
|
||||
// [ e z ]
|
||||
// [ u ]
|
||||
Mat_<double> invG = G.inv(DECOMP_CHOLESKY);
|
||||
|
||||
ig11 = invG(1,1);
|
||||
ig03 = invG(0,3);
|
||||
ig33 = invG(3,3);
|
||||
ig55 = invG(5,5);
|
||||
}
|
||||
|
||||
|
||||
void cv::cuda::FarnebackOpticalFlow::setPolynomialExpansionConsts(int n, double sigma)
|
||||
{
|
||||
std::vector<float> buf(n*6 + 3);
|
||||
float* g = &buf[0] + n;
|
||||
float* xg = g + n*2 + 1;
|
||||
float* xxg = xg + n*2 + 1;
|
||||
|
||||
if (sigma < FLT_EPSILON)
|
||||
sigma = n*0.3;
|
||||
|
||||
double ig11, ig03, ig33, ig55;
|
||||
prepareGaussian(n, sigma, g, xg, xxg, ig11, ig03, ig33, ig55);
|
||||
|
||||
device::optflow_farneback::setPolynomialExpansionConsts(n, g, xg, xxg, static_cast<float>(ig11), static_cast<float>(ig03), static_cast<float>(ig33), static_cast<float>(ig55));
|
||||
}
|
||||
|
||||
|
||||
void cv::cuda::FarnebackOpticalFlow::updateFlow_boxFilter(
|
||||
const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy,
|
||||
GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[])
|
||||
{
|
||||
if (deviceSupports(FEATURE_SET_COMPUTE_12))
|
||||
device::optflow_farneback::boxFilter5Gpu(M, blockSize/2, bufM, S(streams[0]));
|
||||
else
|
||||
device::optflow_farneback::boxFilter5Gpu_CC11(M, blockSize/2, bufM, S(streams[0]));
|
||||
swap(M, bufM);
|
||||
|
||||
for (int i = 1; i < 5; ++i)
|
||||
streams[i].waitForCompletion();
|
||||
device::optflow_farneback::updateFlowGpu(M, flowx, flowy, S(streams[0]));
|
||||
|
||||
if (updateMatrices)
|
||||
device::optflow_farneback::updateMatricesGpu(flowx, flowy, R0, R1, M, S(streams[0]));
|
||||
}
|
||||
|
||||
|
||||
void cv::cuda::FarnebackOpticalFlow::updateFlow_gaussianBlur(
|
||||
const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy,
|
||||
GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[])
|
||||
{
|
||||
if (deviceSupports(FEATURE_SET_COMPUTE_12))
|
||||
device::optflow_farneback::gaussianBlur5Gpu(
|
||||
M, blockSize/2, bufM, BORDER_REPLICATE, S(streams[0]));
|
||||
else
|
||||
device::optflow_farneback::gaussianBlur5Gpu_CC11(
|
||||
M, blockSize/2, bufM, BORDER_REPLICATE, S(streams[0]));
|
||||
swap(M, bufM);
|
||||
|
||||
device::optflow_farneback::updateFlowGpu(M, flowx, flowy, S(streams[0]));
|
||||
|
||||
if (updateMatrices)
|
||||
device::optflow_farneback::updateMatricesGpu(flowx, flowy, R0, R1, M, S(streams[0]));
|
||||
}
|
||||
|
||||
|
||||
void cv::cuda::FarnebackOpticalFlow::operator ()(
|
||||
const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &s)
|
||||
{
|
||||
CV_Assert(frame0.channels() == 1 && frame1.channels() == 1);
|
||||
CV_Assert(frame0.size() == frame1.size());
|
||||
CV_Assert(polyN == 5 || polyN == 7);
|
||||
CV_Assert(!fastPyramids || std::abs(pyrScale - 0.5) < 1e-6);
|
||||
|
||||
Stream streams[5];
|
||||
if (S(s))
|
||||
streams[0] = s;
|
||||
|
||||
Size size = frame0.size();
|
||||
GpuMat prevFlowX, prevFlowY, curFlowX, curFlowY;
|
||||
|
||||
flowx.create(size, CV_32F);
|
||||
flowy.create(size, CV_32F);
|
||||
GpuMat flowx0 = flowx;
|
||||
GpuMat flowy0 = flowy;
|
||||
|
||||
// Crop unnecessary levels
|
||||
double scale = 1;
|
||||
int numLevelsCropped = 0;
|
||||
for (; numLevelsCropped < numLevels; numLevelsCropped++)
|
||||
{
|
||||
scale *= pyrScale;
|
||||
if (size.width*scale < MIN_SIZE || size.height*scale < MIN_SIZE)
|
||||
break;
|
||||
}
|
||||
|
||||
frame0.convertTo(frames_[0], CV_32F, streams[0]);
|
||||
frame1.convertTo(frames_[1], CV_32F, streams[1]);
|
||||
|
||||
if (fastPyramids)
|
||||
{
|
||||
// Build Gaussian pyramids using pyrDown()
|
||||
pyramid0_.resize(numLevelsCropped + 1);
|
||||
pyramid1_.resize(numLevelsCropped + 1);
|
||||
pyramid0_[0] = frames_[0];
|
||||
pyramid1_[0] = frames_[1];
|
||||
for (int i = 1; i <= numLevelsCropped; ++i)
|
||||
{
|
||||
cuda::pyrDown(pyramid0_[i - 1], pyramid0_[i], streams[0]);
|
||||
cuda::pyrDown(pyramid1_[i - 1], pyramid1_[i], streams[1]);
|
||||
}
|
||||
}
|
||||
|
||||
setPolynomialExpansionConsts(polyN, polySigma);
|
||||
device::optflow_farneback::setUpdateMatricesConsts();
|
||||
|
||||
for (int k = numLevelsCropped; k >= 0; k--)
|
||||
{
|
||||
streams[0].waitForCompletion();
|
||||
|
||||
scale = 1;
|
||||
for (int i = 0; i < k; i++)
|
||||
scale *= pyrScale;
|
||||
|
||||
double sigma = (1./scale - 1) * 0.5;
|
||||
int smoothSize = cvRound(sigma*5) | 1;
|
||||
smoothSize = std::max(smoothSize, 3);
|
||||
|
||||
int width = cvRound(size.width*scale);
|
||||
int height = cvRound(size.height*scale);
|
||||
|
||||
if (fastPyramids)
|
||||
{
|
||||
width = pyramid0_[k].cols;
|
||||
height = pyramid0_[k].rows;
|
||||
g[x] = (float)std::exp(-x*x/(2*sigma*sigma));
|
||||
s += g[x];
|
||||
}
|
||||
|
||||
if (k > 0)
|
||||
s = 1./s;
|
||||
for (int x = -n; x <= n; x++)
|
||||
{
|
||||
curFlowX.create(height, width, CV_32F);
|
||||
curFlowY.create(height, width, CV_32F);
|
||||
}
|
||||
else
|
||||
{
|
||||
curFlowX = flowx0;
|
||||
curFlowY = flowy0;
|
||||
g[x] = (float)(g[x]*s);
|
||||
xg[x] = (float)(x*g[x]);
|
||||
xxg[x] = (float)(x*x*g[x]);
|
||||
}
|
||||
|
||||
if (!prevFlowX.data)
|
||||
Mat_<double> G(6, 6);
|
||||
G.setTo(0);
|
||||
|
||||
for (int y = -n; y <= n; y++)
|
||||
{
|
||||
if (flags & OPTFLOW_USE_INITIAL_FLOW)
|
||||
for (int x = -n; x <= n; x++)
|
||||
{
|
||||
cuda::resize(flowx0, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);
|
||||
cuda::resize(flowy0, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);
|
||||
curFlowX.convertTo(curFlowX, curFlowX.depth(), scale, streams[0]);
|
||||
curFlowY.convertTo(curFlowY, curFlowY.depth(), scale, streams[1]);
|
||||
G(0,0) += g[y]*g[x];
|
||||
G(1,1) += g[y]*g[x]*x*x;
|
||||
G(3,3) += g[y]*g[x]*x*x*x*x;
|
||||
G(5,5) += g[y]*g[x]*x*x*y*y;
|
||||
}
|
||||
}
|
||||
|
||||
//G[0][0] = 1.;
|
||||
G(2,2) = G(0,3) = G(0,4) = G(3,0) = G(4,0) = G(1,1);
|
||||
G(4,4) = G(3,3);
|
||||
G(3,4) = G(4,3) = G(5,5);
|
||||
|
||||
// invG:
|
||||
// [ x e e ]
|
||||
// [ y ]
|
||||
// [ y ]
|
||||
// [ e z ]
|
||||
// [ e z ]
|
||||
// [ u ]
|
||||
Mat_<double> invG = G.inv(DECOMP_CHOLESKY);
|
||||
|
||||
ig11 = invG(1,1);
|
||||
ig03 = invG(0,3);
|
||||
ig33 = invG(3,3);
|
||||
ig55 = invG(5,5);
|
||||
}
|
||||
|
||||
void FarnebackOpticalFlowImpl::setPolynomialExpansionConsts(int n, double sigma)
|
||||
{
|
||||
std::vector<float> buf(n*6 + 3);
|
||||
float* g = &buf[0] + n;
|
||||
float* xg = g + n*2 + 1;
|
||||
float* xxg = xg + n*2 + 1;
|
||||
|
||||
if (sigma < FLT_EPSILON)
|
||||
sigma = n*0.3;
|
||||
|
||||
double ig11, ig03, ig33, ig55;
|
||||
prepareGaussian(n, sigma, g, xg, xxg, ig11, ig03, ig33, ig55);
|
||||
|
||||
device::optflow_farneback::setPolynomialExpansionConsts(n, g, xg, xxg, static_cast<float>(ig11), static_cast<float>(ig03), static_cast<float>(ig33), static_cast<float>(ig55));
|
||||
}
|
||||
|
||||
void FarnebackOpticalFlowImpl::updateFlow_boxFilter(
|
||||
const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy,
|
||||
GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[])
|
||||
{
|
||||
if (deviceSupports(FEATURE_SET_COMPUTE_12))
|
||||
device::optflow_farneback::boxFilter5Gpu(M, blockSize/2, bufM, StreamAccessor::getStream(streams[0]));
|
||||
else
|
||||
device::optflow_farneback::boxFilter5Gpu_CC11(M, blockSize/2, bufM, StreamAccessor::getStream(streams[0]));
|
||||
swap(M, bufM);
|
||||
|
||||
for (int i = 1; i < 5; ++i)
|
||||
streams[i].waitForCompletion();
|
||||
device::optflow_farneback::updateFlowGpu(M, flowx, flowy, StreamAccessor::getStream(streams[0]));
|
||||
|
||||
if (updateMatrices)
|
||||
device::optflow_farneback::updateMatricesGpu(flowx, flowy, R0, R1, M, StreamAccessor::getStream(streams[0]));
|
||||
}
|
||||
|
||||
void FarnebackOpticalFlowImpl::updateFlow_gaussianBlur(
|
||||
const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy,
|
||||
GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[])
|
||||
{
|
||||
if (deviceSupports(FEATURE_SET_COMPUTE_12))
|
||||
device::optflow_farneback::gaussianBlur5Gpu(
|
||||
M, blockSize/2, bufM, BORDER_REPLICATE, StreamAccessor::getStream(streams[0]));
|
||||
else
|
||||
device::optflow_farneback::gaussianBlur5Gpu_CC11(
|
||||
M, blockSize/2, bufM, BORDER_REPLICATE, StreamAccessor::getStream(streams[0]));
|
||||
swap(M, bufM);
|
||||
|
||||
device::optflow_farneback::updateFlowGpu(M, flowx, flowy, StreamAccessor::getStream(streams[0]));
|
||||
|
||||
if (updateMatrices)
|
||||
device::optflow_farneback::updateMatricesGpu(flowx, flowy, R0, R1, M, StreamAccessor::getStream(streams[0]));
|
||||
}
|
||||
|
||||
void FarnebackOpticalFlowImpl::calcImpl(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &stream)
|
||||
{
|
||||
CV_Assert(frame0.channels() == 1 && frame1.channels() == 1);
|
||||
CV_Assert(frame0.size() == frame1.size());
|
||||
CV_Assert(polyN_ == 5 || polyN_ == 7);
|
||||
CV_Assert(!fastPyramids_ || std::abs(pyrScale_ - 0.5) < 1e-6);
|
||||
|
||||
Stream streams[5];
|
||||
if (stream)
|
||||
streams[0] = stream;
|
||||
|
||||
Size size = frame0.size();
|
||||
GpuMat prevFlowX, prevFlowY, curFlowX, curFlowY;
|
||||
|
||||
flowx.create(size, CV_32F);
|
||||
flowy.create(size, CV_32F);
|
||||
GpuMat flowx0 = flowx;
|
||||
GpuMat flowy0 = flowy;
|
||||
|
||||
// Crop unnecessary levels
|
||||
double scale = 1;
|
||||
int numLevelsCropped = 0;
|
||||
for (; numLevelsCropped < numLevels_; numLevelsCropped++)
|
||||
{
|
||||
scale *= pyrScale_;
|
||||
if (size.width*scale < MIN_SIZE || size.height*scale < MIN_SIZE)
|
||||
break;
|
||||
}
|
||||
|
||||
frame0.convertTo(frames_[0], CV_32F, streams[0]);
|
||||
frame1.convertTo(frames_[1], CV_32F, streams[1]);
|
||||
|
||||
if (fastPyramids_)
|
||||
{
|
||||
// Build Gaussian pyramids using pyrDown()
|
||||
pyramid0_.resize(numLevelsCropped + 1);
|
||||
pyramid1_.resize(numLevelsCropped + 1);
|
||||
pyramid0_[0] = frames_[0];
|
||||
pyramid1_[0] = frames_[1];
|
||||
for (int i = 1; i <= numLevelsCropped; ++i)
|
||||
{
|
||||
cuda::pyrDown(pyramid0_[i - 1], pyramid0_[i], streams[0]);
|
||||
cuda::pyrDown(pyramid1_[i - 1], pyramid1_[i], streams[1]);
|
||||
}
|
||||
}
|
||||
|
||||
setPolynomialExpansionConsts(polyN_, polySigma_);
|
||||
device::optflow_farneback::setUpdateMatricesConsts();
|
||||
|
||||
for (int k = numLevelsCropped; k >= 0; k--)
|
||||
{
|
||||
streams[0].waitForCompletion();
|
||||
|
||||
scale = 1;
|
||||
for (int i = 0; i < k; i++)
|
||||
scale *= pyrScale_;
|
||||
|
||||
double sigma = (1./scale - 1) * 0.5;
|
||||
int smoothSize = cvRound(sigma*5) | 1;
|
||||
smoothSize = std::max(smoothSize, 3);
|
||||
|
||||
int width = cvRound(size.width*scale);
|
||||
int height = cvRound(size.height*scale);
|
||||
|
||||
if (fastPyramids_)
|
||||
{
|
||||
width = pyramid0_[k].cols;
|
||||
height = pyramid0_[k].rows;
|
||||
}
|
||||
|
||||
if (k > 0)
|
||||
{
|
||||
curFlowX.create(height, width, CV_32F);
|
||||
curFlowY.create(height, width, CV_32F);
|
||||
}
|
||||
else
|
||||
{
|
||||
curFlowX.setTo(0, streams[0]);
|
||||
curFlowY.setTo(0, streams[1]);
|
||||
curFlowX = flowx0;
|
||||
curFlowY = flowy0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cuda::resize(prevFlowX, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);
|
||||
cuda::resize(prevFlowY, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);
|
||||
curFlowX.convertTo(curFlowX, curFlowX.depth(), 1./pyrScale, streams[0]);
|
||||
curFlowY.convertTo(curFlowY, curFlowY.depth(), 1./pyrScale, streams[1]);
|
||||
}
|
||||
|
||||
GpuMat M = allocMatFromBuf(5*height, width, CV_32F, M_);
|
||||
GpuMat bufM = allocMatFromBuf(5*height, width, CV_32F, bufM_);
|
||||
GpuMat R[2] =
|
||||
{
|
||||
allocMatFromBuf(5*height, width, CV_32F, R_[0]),
|
||||
allocMatFromBuf(5*height, width, CV_32F, R_[1])
|
||||
};
|
||||
|
||||
if (fastPyramids)
|
||||
{
|
||||
device::optflow_farneback::polynomialExpansionGpu(pyramid0_[k], polyN, R[0], S(streams[0]));
|
||||
device::optflow_farneback::polynomialExpansionGpu(pyramid1_[k], polyN, R[1], S(streams[1]));
|
||||
}
|
||||
else
|
||||
{
|
||||
GpuMat blurredFrame[2] =
|
||||
if (!prevFlowX.data)
|
||||
{
|
||||
allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[0]),
|
||||
allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[1])
|
||||
};
|
||||
GpuMat pyrLevel[2] =
|
||||
{
|
||||
allocMatFromBuf(height, width, CV_32F, pyrLevel_[0]),
|
||||
allocMatFromBuf(height, width, CV_32F, pyrLevel_[1])
|
||||
};
|
||||
|
||||
Mat g = getGaussianKernel(smoothSize, sigma, CV_32F);
|
||||
device::optflow_farneback::setGaussianBlurKernel(g.ptr<float>(smoothSize/2), smoothSize/2);
|
||||
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
device::optflow_farneback::gaussianBlurGpu(
|
||||
frames_[i], smoothSize/2, blurredFrame[i], BORDER_REFLECT101, S(streams[i]));
|
||||
cuda::resize(blurredFrame[i], pyrLevel[i], Size(width, height), 0.0, 0.0, INTER_LINEAR, streams[i]);
|
||||
device::optflow_farneback::polynomialExpansionGpu(pyrLevel[i], polyN, R[i], S(streams[i]));
|
||||
if (flags_ & OPTFLOW_USE_INITIAL_FLOW)
|
||||
{
|
||||
cuda::resize(flowx0, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);
|
||||
cuda::resize(flowy0, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);
|
||||
curFlowX.convertTo(curFlowX, curFlowX.depth(), scale, streams[0]);
|
||||
curFlowY.convertTo(curFlowY, curFlowY.depth(), scale, streams[1]);
|
||||
}
|
||||
else
|
||||
{
|
||||
curFlowX.setTo(0, streams[0]);
|
||||
curFlowY.setTo(0, streams[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
streams[1].waitForCompletion();
|
||||
device::optflow_farneback::updateMatricesGpu(curFlowX, curFlowY, R[0], R[1], M, S(streams[0]));
|
||||
|
||||
if (flags & OPTFLOW_FARNEBACK_GAUSSIAN)
|
||||
{
|
||||
Mat g = getGaussianKernel(winSize, winSize/2*0.3f, CV_32F);
|
||||
device::optflow_farneback::setGaussianBlurKernel(g.ptr<float>(winSize/2), winSize/2);
|
||||
}
|
||||
for (int i = 0; i < numIters; i++)
|
||||
{
|
||||
if (flags & OPTFLOW_FARNEBACK_GAUSSIAN)
|
||||
updateFlow_gaussianBlur(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize, i < numIters-1, streams);
|
||||
else
|
||||
updateFlow_boxFilter(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize, i < numIters-1, streams);
|
||||
{
|
||||
cuda::resize(prevFlowX, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);
|
||||
cuda::resize(prevFlowY, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);
|
||||
curFlowX.convertTo(curFlowX, curFlowX.depth(), 1./pyrScale_, streams[0]);
|
||||
curFlowY.convertTo(curFlowY, curFlowY.depth(), 1./pyrScale_, streams[1]);
|
||||
}
|
||||
|
||||
GpuMat M = allocMatFromBuf(5*height, width, CV_32F, M_);
|
||||
GpuMat bufM = allocMatFromBuf(5*height, width, CV_32F, bufM_);
|
||||
GpuMat R[2] =
|
||||
{
|
||||
allocMatFromBuf(5*height, width, CV_32F, R_[0]),
|
||||
allocMatFromBuf(5*height, width, CV_32F, R_[1])
|
||||
};
|
||||
|
||||
if (fastPyramids_)
|
||||
{
|
||||
device::optflow_farneback::polynomialExpansionGpu(pyramid0_[k], polyN_, R[0], StreamAccessor::getStream(streams[0]));
|
||||
device::optflow_farneback::polynomialExpansionGpu(pyramid1_[k], polyN_, R[1], StreamAccessor::getStream(streams[1]));
|
||||
}
|
||||
else
|
||||
{
|
||||
GpuMat blurredFrame[2] =
|
||||
{
|
||||
allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[0]),
|
||||
allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[1])
|
||||
};
|
||||
GpuMat pyrLevel[2] =
|
||||
{
|
||||
allocMatFromBuf(height, width, CV_32F, pyrLevel_[0]),
|
||||
allocMatFromBuf(height, width, CV_32F, pyrLevel_[1])
|
||||
};
|
||||
|
||||
Mat g = getGaussianKernel(smoothSize, sigma, CV_32F);
|
||||
device::optflow_farneback::setGaussianBlurKernel(g.ptr<float>(smoothSize/2), smoothSize/2);
|
||||
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
device::optflow_farneback::gaussianBlurGpu(
|
||||
frames_[i], smoothSize/2, blurredFrame[i], BORDER_REFLECT101, StreamAccessor::getStream(streams[i]));
|
||||
cuda::resize(blurredFrame[i], pyrLevel[i], Size(width, height), 0.0, 0.0, INTER_LINEAR, streams[i]);
|
||||
device::optflow_farneback::polynomialExpansionGpu(pyrLevel[i], polyN_, R[i], StreamAccessor::getStream(streams[i]));
|
||||
}
|
||||
}
|
||||
|
||||
streams[1].waitForCompletion();
|
||||
device::optflow_farneback::updateMatricesGpu(curFlowX, curFlowY, R[0], R[1], M, StreamAccessor::getStream(streams[0]));
|
||||
|
||||
if (flags_ & OPTFLOW_FARNEBACK_GAUSSIAN)
|
||||
{
|
||||
Mat g = getGaussianKernel(winSize_, winSize_/2*0.3f, CV_32F);
|
||||
device::optflow_farneback::setGaussianBlurKernel(g.ptr<float>(winSize_/2), winSize_/2);
|
||||
}
|
||||
for (int i = 0; i < numIters_; i++)
|
||||
{
|
||||
if (flags_ & OPTFLOW_FARNEBACK_GAUSSIAN)
|
||||
updateFlow_gaussianBlur(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize_, i < numIters_-1, streams);
|
||||
else
|
||||
updateFlow_boxFilter(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize_, i < numIters_-1, streams);
|
||||
}
|
||||
|
||||
prevFlowX = curFlowX;
|
||||
prevFlowY = curFlowY;
|
||||
}
|
||||
|
||||
prevFlowX = curFlowX;
|
||||
prevFlowY = curFlowY;
|
||||
flowx = curFlowX;
|
||||
flowy = curFlowY;
|
||||
|
||||
if (!stream)
|
||||
streams[0].waitForCompletion();
|
||||
}
|
||||
}
|
||||
|
||||
flowx = curFlowX;
|
||||
flowy = curFlowY;
|
||||
|
||||
if (!S(s))
|
||||
streams[0].waitForCompletion();
|
||||
Ptr<FarnebackOpticalFlow> cv::cuda::FarnebackOpticalFlow::create(int numLevels, double pyrScale, bool fastPyramids, int winSize,
|
||||
int numIters, int polyN, double polySigma, int flags)
|
||||
{
|
||||
return makePtr<FarnebackOpticalFlowImpl>(numLevels, pyrScale, fastPyramids, winSize,
|
||||
numIters, polyN, polySigma, flags);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -47,37 +47,54 @@ using namespace cv::cuda;
|
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
|
||||
cv::cuda::PyrLKOpticalFlow::PyrLKOpticalFlow() { throw_no_cuda(); }
|
||||
void cv::cuda::PyrLKOpticalFlow::sparse(const GpuMat&, const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat*) { throw_no_cuda(); }
|
||||
void cv::cuda::PyrLKOpticalFlow::dense(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat*) { throw_no_cuda(); }
|
||||
void cv::cuda::PyrLKOpticalFlow::releaseMemory() {}
|
||||
Ptr<SparsePyrLKOpticalFlow> cv::cuda::SparsePyrLKOpticalFlow::create(Size, int, int, bool) { throw_no_cuda(); return Ptr<SparsePyrLKOpticalFlow>(); }
|
||||
|
||||
Ptr<DensePyrLKOpticalFlow> cv::cuda::DensePyrLKOpticalFlow::create(Size, int, int, bool) { throw_no_cuda(); return Ptr<SparsePyrLKOpticalFlow>(); }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
namespace pyrlk
|
||||
{
|
||||
void loadConstants(int2 winSize, int iters);
|
||||
void loadConstants(int2 winSize, int iters, cudaStream_t stream);
|
||||
|
||||
void sparse1(PtrStepSzf I, PtrStepSzf J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
|
||||
int level, dim3 block, dim3 patch, cudaStream_t stream = 0);
|
||||
int level, dim3 block, dim3 patch, cudaStream_t stream);
|
||||
void sparse4(PtrStepSz<float4> I, PtrStepSz<float4> J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
|
||||
int level, dim3 block, dim3 patch, cudaStream_t stream = 0);
|
||||
int level, dim3 block, dim3 patch, cudaStream_t stream);
|
||||
|
||||
void dense(PtrStepSzb I, PtrStepSzf J, PtrStepSzf u, PtrStepSzf v, PtrStepSzf prevU, PtrStepSzf prevV,
|
||||
PtrStepSzf err, int2 winSize, cudaStream_t stream = 0);
|
||||
}
|
||||
|
||||
cv::cuda::PyrLKOpticalFlow::PyrLKOpticalFlow()
|
||||
{
|
||||
winSize = Size(21, 21);
|
||||
maxLevel = 3;
|
||||
iters = 30;
|
||||
useInitialFlow = false;
|
||||
PtrStepSzf err, int2 winSize, cudaStream_t stream);
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
void calcPatchSize(cv::Size winSize, dim3& block, dim3& patch)
|
||||
class PyrLKOpticalFlowBase
|
||||
{
|
||||
public:
|
||||
PyrLKOpticalFlowBase(Size winSize, int maxLevel, int iters, bool useInitialFlow);
|
||||
|
||||
void sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts,
|
||||
GpuMat& status, GpuMat* err, Stream& stream);
|
||||
|
||||
void dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, Stream& stream);
|
||||
|
||||
protected:
|
||||
Size winSize_;
|
||||
int maxLevel_;
|
||||
int iters_;
|
||||
bool useInitialFlow_;
|
||||
|
||||
private:
|
||||
std::vector<GpuMat> prevPyr_;
|
||||
std::vector<GpuMat> nextPyr_;
|
||||
};
|
||||
|
||||
PyrLKOpticalFlowBase::PyrLKOpticalFlowBase(Size winSize, int maxLevel, int iters, bool useInitialFlow) :
|
||||
winSize_(winSize), maxLevel_(maxLevel), iters_(iters), useInitialFlow_(useInitialFlow)
|
||||
{
|
||||
}
|
||||
|
||||
void calcPatchSize(Size winSize, dim3& block, dim3& patch)
|
||||
{
|
||||
if (winSize.width > 32 && winSize.width > 2 * winSize.height)
|
||||
{
|
||||
@ -95,156 +112,239 @@ namespace
|
||||
|
||||
block.z = patch.z = 1;
|
||||
}
|
||||
}
|
||||
|
||||
void cv::cuda::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts, GpuMat& status, GpuMat* err)
|
||||
{
|
||||
if (prevPts.empty())
|
||||
void PyrLKOpticalFlowBase::sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts, GpuMat& status, GpuMat* err, Stream& stream)
|
||||
{
|
||||
nextPts.release();
|
||||
status.release();
|
||||
if (err) err->release();
|
||||
return;
|
||||
}
|
||||
|
||||
dim3 block, patch;
|
||||
calcPatchSize(winSize, block, patch);
|
||||
|
||||
CV_Assert(prevImg.channels() == 1 || prevImg.channels() == 3 || prevImg.channels() == 4);
|
||||
CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type());
|
||||
CV_Assert(maxLevel >= 0);
|
||||
CV_Assert(winSize.width > 2 && winSize.height > 2);
|
||||
CV_Assert(patch.x > 0 && patch.x < 6 && patch.y > 0 && patch.y < 6);
|
||||
CV_Assert(prevPts.rows == 1 && prevPts.type() == CV_32FC2);
|
||||
|
||||
if (useInitialFlow)
|
||||
CV_Assert(nextPts.size() == prevPts.size() && nextPts.type() == CV_32FC2);
|
||||
else
|
||||
ensureSizeIsEnough(1, prevPts.cols, prevPts.type(), nextPts);
|
||||
|
||||
GpuMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1);
|
||||
GpuMat temp2 = nextPts.reshape(1);
|
||||
cuda::multiply(temp1, Scalar::all(1.0 / (1 << maxLevel) / 2.0), temp2);
|
||||
|
||||
ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status);
|
||||
status.setTo(Scalar::all(1));
|
||||
|
||||
if (err)
|
||||
ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, *err);
|
||||
|
||||
// build the image pyramids.
|
||||
|
||||
prevPyr_.resize(maxLevel + 1);
|
||||
nextPyr_.resize(maxLevel + 1);
|
||||
|
||||
int cn = prevImg.channels();
|
||||
|
||||
if (cn == 1 || cn == 4)
|
||||
{
|
||||
prevImg.convertTo(prevPyr_[0], CV_32F);
|
||||
nextImg.convertTo(nextPyr_[0], CV_32F);
|
||||
}
|
||||
else
|
||||
{
|
||||
cuda::cvtColor(prevImg, buf_, COLOR_BGR2BGRA);
|
||||
buf_.convertTo(prevPyr_[0], CV_32F);
|
||||
|
||||
cuda::cvtColor(nextImg, buf_, COLOR_BGR2BGRA);
|
||||
buf_.convertTo(nextPyr_[0], CV_32F);
|
||||
}
|
||||
|
||||
for (int level = 1; level <= maxLevel; ++level)
|
||||
{
|
||||
cuda::pyrDown(prevPyr_[level - 1], prevPyr_[level]);
|
||||
cuda::pyrDown(nextPyr_[level - 1], nextPyr_[level]);
|
||||
}
|
||||
|
||||
pyrlk::loadConstants(make_int2(winSize.width, winSize.height), iters);
|
||||
|
||||
for (int level = maxLevel; level >= 0; level--)
|
||||
{
|
||||
if (cn == 1)
|
||||
if (prevPts.empty())
|
||||
{
|
||||
pyrlk::sparse1(prevPyr_[level], nextPyr_[level],
|
||||
prevPts.ptr<float2>(), nextPts.ptr<float2>(), status.ptr(), level == 0 && err ? err->ptr<float>() : 0, prevPts.cols,
|
||||
level, block, patch);
|
||||
nextPts.release();
|
||||
status.release();
|
||||
if (err) err->release();
|
||||
return;
|
||||
}
|
||||
|
||||
dim3 block, patch;
|
||||
calcPatchSize(winSize_, block, patch);
|
||||
|
||||
CV_Assert( prevImg.channels() == 1 || prevImg.channels() == 3 || prevImg.channels() == 4 );
|
||||
CV_Assert( prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type() );
|
||||
CV_Assert( maxLevel_ >= 0 );
|
||||
CV_Assert( winSize_.width > 2 && winSize_.height > 2 );
|
||||
CV_Assert( patch.x > 0 && patch.x < 6 && patch.y > 0 && patch.y < 6 );
|
||||
CV_Assert( prevPts.rows == 1 && prevPts.type() == CV_32FC2 );
|
||||
|
||||
if (useInitialFlow_)
|
||||
CV_Assert( nextPts.size() == prevPts.size() && nextPts.type() == prevPts.type() );
|
||||
else
|
||||
ensureSizeIsEnough(1, prevPts.cols, prevPts.type(), nextPts);
|
||||
|
||||
GpuMat temp1 = (useInitialFlow_ ? nextPts : prevPts).reshape(1);
|
||||
GpuMat temp2 = nextPts.reshape(1);
|
||||
cuda::multiply(temp1, Scalar::all(1.0 / (1 << maxLevel_) / 2.0), temp2, 1, -1, stream);
|
||||
|
||||
ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status);
|
||||
status.setTo(Scalar::all(1), stream);
|
||||
|
||||
if (err)
|
||||
ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, *err);
|
||||
|
||||
// build the image pyramids.
|
||||
|
||||
BufferPool pool(stream);
|
||||
|
||||
prevPyr_.resize(maxLevel_ + 1);
|
||||
nextPyr_.resize(maxLevel_ + 1);
|
||||
|
||||
int cn = prevImg.channels();
|
||||
|
||||
if (cn == 1 || cn == 4)
|
||||
{
|
||||
prevImg.convertTo(prevPyr_[0], CV_32F, stream);
|
||||
nextImg.convertTo(nextPyr_[0], CV_32F, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
pyrlk::sparse4(prevPyr_[level], nextPyr_[level],
|
||||
prevPts.ptr<float2>(), nextPts.ptr<float2>(), status.ptr(), level == 0 && err ? err->ptr<float>() : 0, prevPts.cols,
|
||||
level, block, patch);
|
||||
GpuMat buf = pool.getBuffer(prevImg.size(), CV_MAKE_TYPE(prevImg.depth(), 4));
|
||||
|
||||
cuda::cvtColor(prevImg, buf, COLOR_BGR2BGRA, 0, stream);
|
||||
buf.convertTo(prevPyr_[0], CV_32F, stream);
|
||||
|
||||
cuda::cvtColor(nextImg, buf, COLOR_BGR2BGRA, 0, stream);
|
||||
buf.convertTo(nextPyr_[0], CV_32F, stream);
|
||||
}
|
||||
|
||||
for (int level = 1; level <= maxLevel_; ++level)
|
||||
{
|
||||
cuda::pyrDown(prevPyr_[level - 1], prevPyr_[level], stream);
|
||||
cuda::pyrDown(nextPyr_[level - 1], nextPyr_[level], stream);
|
||||
}
|
||||
|
||||
pyrlk::loadConstants(make_int2(winSize_.width, winSize_.height), iters_, StreamAccessor::getStream(stream));
|
||||
|
||||
for (int level = maxLevel_; level >= 0; level--)
|
||||
{
|
||||
if (cn == 1)
|
||||
{
|
||||
pyrlk::sparse1(prevPyr_[level], nextPyr_[level],
|
||||
prevPts.ptr<float2>(), nextPts.ptr<float2>(),
|
||||
status.ptr(),
|
||||
level == 0 && err ? err->ptr<float>() : 0, prevPts.cols,
|
||||
level, block, patch,
|
||||
StreamAccessor::getStream(stream));
|
||||
}
|
||||
else
|
||||
{
|
||||
pyrlk::sparse4(prevPyr_[level], nextPyr_[level],
|
||||
prevPts.ptr<float2>(), nextPts.ptr<float2>(),
|
||||
status.ptr(),
|
||||
level == 0 && err ? err->ptr<float>() : 0, prevPts.cols,
|
||||
level, block, patch,
|
||||
StreamAccessor::getStream(stream));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cv::cuda::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err)
|
||||
{
|
||||
CV_Assert(prevImg.type() == CV_8UC1);
|
||||
CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type());
|
||||
CV_Assert(maxLevel >= 0);
|
||||
CV_Assert(winSize.width > 2 && winSize.height > 2);
|
||||
|
||||
if (err)
|
||||
err->create(prevImg.size(), CV_32FC1);
|
||||
|
||||
// build the image pyramids.
|
||||
|
||||
prevPyr_.resize(maxLevel + 1);
|
||||
nextPyr_.resize(maxLevel + 1);
|
||||
|
||||
prevPyr_[0] = prevImg;
|
||||
nextImg.convertTo(nextPyr_[0], CV_32F);
|
||||
|
||||
for (int level = 1; level <= maxLevel; ++level)
|
||||
void PyrLKOpticalFlowBase::dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, Stream& stream)
|
||||
{
|
||||
cuda::pyrDown(prevPyr_[level - 1], prevPyr_[level]);
|
||||
cuda::pyrDown(nextPyr_[level - 1], nextPyr_[level]);
|
||||
CV_Assert( prevImg.type() == CV_8UC1 );
|
||||
CV_Assert( prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type() );
|
||||
CV_Assert( maxLevel_ >= 0 );
|
||||
CV_Assert( winSize_.width > 2 && winSize_.height > 2 );
|
||||
|
||||
// build the image pyramids.
|
||||
|
||||
prevPyr_.resize(maxLevel_ + 1);
|
||||
nextPyr_.resize(maxLevel_ + 1);
|
||||
|
||||
prevPyr_[0] = prevImg;
|
||||
nextImg.convertTo(nextPyr_[0], CV_32F, stream);
|
||||
|
||||
for (int level = 1; level <= maxLevel_; ++level)
|
||||
{
|
||||
cuda::pyrDown(prevPyr_[level - 1], prevPyr_[level], stream);
|
||||
cuda::pyrDown(nextPyr_[level - 1], nextPyr_[level], stream);
|
||||
}
|
||||
|
||||
BufferPool pool(stream);
|
||||
|
||||
GpuMat uPyr[] = {
|
||||
pool.getBuffer(prevImg.size(), CV_32FC1),
|
||||
pool.getBuffer(prevImg.size(), CV_32FC1),
|
||||
};
|
||||
GpuMat vPyr[] = {
|
||||
pool.getBuffer(prevImg.size(), CV_32FC1),
|
||||
pool.getBuffer(prevImg.size(), CV_32FC1),
|
||||
};
|
||||
|
||||
uPyr[0].setTo(Scalar::all(0), stream);
|
||||
vPyr[0].setTo(Scalar::all(0), stream);
|
||||
uPyr[1].setTo(Scalar::all(0), stream);
|
||||
vPyr[1].setTo(Scalar::all(0), stream);
|
||||
|
||||
int2 winSize2i = make_int2(winSize_.width, winSize_.height);
|
||||
pyrlk::loadConstants(winSize2i, iters_, StreamAccessor::getStream(stream));
|
||||
|
||||
int idx = 0;
|
||||
|
||||
for (int level = maxLevel_; level >= 0; level--)
|
||||
{
|
||||
int idx2 = (idx + 1) & 1;
|
||||
|
||||
pyrlk::dense(prevPyr_[level], nextPyr_[level],
|
||||
uPyr[idx], vPyr[idx], uPyr[idx2], vPyr[idx2],
|
||||
PtrStepSzf(), winSize2i,
|
||||
StreamAccessor::getStream(stream));
|
||||
|
||||
if (level > 0)
|
||||
idx = idx2;
|
||||
}
|
||||
|
||||
uPyr[idx].copyTo(u, stream);
|
||||
vPyr[idx].copyTo(v, stream);
|
||||
}
|
||||
|
||||
ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[0]);
|
||||
ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[0]);
|
||||
ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[1]);
|
||||
ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[1]);
|
||||
uPyr_[0].setTo(Scalar::all(0));
|
||||
vPyr_[0].setTo(Scalar::all(0));
|
||||
uPyr_[1].setTo(Scalar::all(0));
|
||||
vPyr_[1].setTo(Scalar::all(0));
|
||||
|
||||
int2 winSize2i = make_int2(winSize.width, winSize.height);
|
||||
pyrlk::loadConstants(winSize2i, iters);
|
||||
|
||||
PtrStepSzf derr = err ? *err : PtrStepSzf();
|
||||
|
||||
int idx = 0;
|
||||
|
||||
for (int level = maxLevel; level >= 0; level--)
|
||||
class SparsePyrLKOpticalFlowImpl : public SparsePyrLKOpticalFlow, private PyrLKOpticalFlowBase
|
||||
{
|
||||
int idx2 = (idx + 1) & 1;
|
||||
public:
|
||||
SparsePyrLKOpticalFlowImpl(Size winSize, int maxLevel, int iters, bool useInitialFlow) :
|
||||
PyrLKOpticalFlowBase(winSize, maxLevel, iters, useInitialFlow)
|
||||
{
|
||||
}
|
||||
|
||||
pyrlk::dense(prevPyr_[level], nextPyr_[level], uPyr_[idx], vPyr_[idx], uPyr_[idx2], vPyr_[idx2],
|
||||
level == 0 ? derr : PtrStepSzf(), winSize2i);
|
||||
virtual Size getWinSize() const { return winSize_; }
|
||||
virtual void setWinSize(Size winSize) { winSize_ = winSize; }
|
||||
|
||||
if (level > 0)
|
||||
idx = idx2;
|
||||
}
|
||||
virtual int getMaxLevel() const { return maxLevel_; }
|
||||
virtual void setMaxLevel(int maxLevel) { maxLevel_ = maxLevel; }
|
||||
|
||||
uPyr_[idx].copyTo(u);
|
||||
vPyr_[idx].copyTo(v);
|
||||
virtual int getNumIters() const { return iters_; }
|
||||
virtual void setNumIters(int iters) { iters_ = iters; }
|
||||
|
||||
virtual bool getUseInitialFlow() const { return useInitialFlow_; }
|
||||
virtual void setUseInitialFlow(bool useInitialFlow) { useInitialFlow_ = useInitialFlow; }
|
||||
|
||||
virtual void calc(InputArray _prevImg, InputArray _nextImg,
|
||||
InputArray _prevPts, InputOutputArray _nextPts,
|
||||
OutputArray _status,
|
||||
OutputArray _err,
|
||||
Stream& stream)
|
||||
{
|
||||
const GpuMat prevImg = _prevImg.getGpuMat();
|
||||
const GpuMat nextImg = _nextImg.getGpuMat();
|
||||
const GpuMat prevPts = _prevPts.getGpuMat();
|
||||
GpuMat& nextPts = _nextPts.getGpuMatRef();
|
||||
GpuMat& status = _status.getGpuMatRef();
|
||||
GpuMat* err = _err.needed() ? &(_err.getGpuMatRef()) : NULL;
|
||||
|
||||
sparse(prevImg, nextImg, prevPts, nextPts, status, err, stream);
|
||||
}
|
||||
};
|
||||
|
||||
class DensePyrLKOpticalFlowImpl : public DensePyrLKOpticalFlow, private PyrLKOpticalFlowBase
|
||||
{
|
||||
public:
|
||||
DensePyrLKOpticalFlowImpl(Size winSize, int maxLevel, int iters, bool useInitialFlow) :
|
||||
PyrLKOpticalFlowBase(winSize, maxLevel, iters, useInitialFlow)
|
||||
{
|
||||
}
|
||||
|
||||
virtual Size getWinSize() const { return winSize_; }
|
||||
virtual void setWinSize(Size winSize) { winSize_ = winSize; }
|
||||
|
||||
virtual int getMaxLevel() const { return maxLevel_; }
|
||||
virtual void setMaxLevel(int maxLevel) { maxLevel_ = maxLevel; }
|
||||
|
||||
virtual int getNumIters() const { return iters_; }
|
||||
virtual void setNumIters(int iters) { iters_ = iters; }
|
||||
|
||||
virtual bool getUseInitialFlow() const { return useInitialFlow_; }
|
||||
virtual void setUseInitialFlow(bool useInitialFlow) { useInitialFlow_ = useInitialFlow; }
|
||||
|
||||
virtual void calc(InputArray _prevImg, InputArray _nextImg, InputOutputArray _flow, Stream& stream)
|
||||
{
|
||||
const GpuMat prevImg = _prevImg.getGpuMat();
|
||||
const GpuMat nextImg = _nextImg.getGpuMat();
|
||||
|
||||
BufferPool pool(stream);
|
||||
GpuMat u = pool.getBuffer(prevImg.size(), CV_32FC1);
|
||||
GpuMat v = pool.getBuffer(prevImg.size(), CV_32FC1);
|
||||
|
||||
dense(prevImg, nextImg, u, v, stream);
|
||||
|
||||
GpuMat flows[] = {u, v};
|
||||
cuda::merge(flows, 2, _flow, stream);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
void cv::cuda::PyrLKOpticalFlow::releaseMemory()
|
||||
Ptr<SparsePyrLKOpticalFlow> cv::cuda::SparsePyrLKOpticalFlow::create(Size winSize, int maxLevel, int iters, bool useInitialFlow)
|
||||
{
|
||||
prevPyr_.clear();
|
||||
nextPyr_.clear();
|
||||
return makePtr<SparsePyrLKOpticalFlowImpl>(winSize, maxLevel, iters, useInitialFlow);
|
||||
}
|
||||
|
||||
buf_.release();
|
||||
|
||||
uPyr_[0].release();
|
||||
vPyr_[0].release();
|
||||
|
||||
uPyr_[1].release();
|
||||
vPyr_[1].release();
|
||||
Ptr<DensePyrLKOpticalFlow> cv::cuda::DensePyrLKOpticalFlow::create(Size winSize, int maxLevel, int iters, bool useInitialFlow)
|
||||
{
|
||||
return makePtr<DensePyrLKOpticalFlowImpl>(winSize, maxLevel, iters, useInitialFlow);
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
||||
|
@ -44,256 +44,338 @@
|
||||
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
|
||||
cv::cuda::OpticalFlowDual_TVL1_CUDA::OpticalFlowDual_TVL1_CUDA() { throw_no_cuda(); }
|
||||
void cv::cuda::OpticalFlowDual_TVL1_CUDA::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
|
||||
void cv::cuda::OpticalFlowDual_TVL1_CUDA::collectGarbage() {}
|
||||
void cv::cuda::OpticalFlowDual_TVL1_CUDA::procOneScale(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
|
||||
Ptr<OpticalFlowDual_TVL1> cv::cuda::OpticalFlowDual_TVL1::create(double, double, double, int, int, double, int, double, double, bool) { throw_no_cuda(); return Ptr<OpticalFlowDual_TVL1>(); }
|
||||
|
||||
#else
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::cuda;
|
||||
|
||||
cv::cuda::OpticalFlowDual_TVL1_CUDA::OpticalFlowDual_TVL1_CUDA()
|
||||
{
|
||||
tau = 0.25;
|
||||
lambda = 0.15;
|
||||
theta = 0.3;
|
||||
nscales = 5;
|
||||
warps = 5;
|
||||
epsilon = 0.01;
|
||||
iterations = 300;
|
||||
scaleStep = 0.8;
|
||||
gamma = 0.0;
|
||||
useInitialFlow = false;
|
||||
}
|
||||
|
||||
void cv::cuda::OpticalFlowDual_TVL1_CUDA::operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy)
|
||||
{
|
||||
CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 );
|
||||
CV_Assert( I0.size() == I1.size() );
|
||||
CV_Assert( I0.type() == I1.type() );
|
||||
CV_Assert( !useInitialFlow || (flowx.size() == I0.size() && flowx.type() == CV_32FC1 && flowy.size() == flowx.size() && flowy.type() == flowx.type()) );
|
||||
CV_Assert( nscales > 0 );
|
||||
|
||||
// allocate memory for the pyramid structure
|
||||
I0s.resize(nscales);
|
||||
I1s.resize(nscales);
|
||||
u1s.resize(nscales);
|
||||
u2s.resize(nscales);
|
||||
u3s.resize(nscales);
|
||||
|
||||
I0.convertTo(I0s[0], CV_32F, I0.depth() == CV_8U ? 1.0 : 255.0);
|
||||
I1.convertTo(I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0);
|
||||
|
||||
if (!useInitialFlow)
|
||||
{
|
||||
flowx.create(I0.size(), CV_32FC1);
|
||||
flowy.create(I0.size(), CV_32FC1);
|
||||
}
|
||||
|
||||
u1s[0] = flowx;
|
||||
u2s[0] = flowy;
|
||||
if (gamma)
|
||||
u3s[0].create(I0.size(), CV_32FC1);
|
||||
|
||||
I1x_buf.create(I0.size(), CV_32FC1);
|
||||
I1y_buf.create(I0.size(), CV_32FC1);
|
||||
|
||||
I1w_buf.create(I0.size(), CV_32FC1);
|
||||
I1wx_buf.create(I0.size(), CV_32FC1);
|
||||
I1wy_buf.create(I0.size(), CV_32FC1);
|
||||
|
||||
grad_buf.create(I0.size(), CV_32FC1);
|
||||
rho_c_buf.create(I0.size(), CV_32FC1);
|
||||
|
||||
p11_buf.create(I0.size(), CV_32FC1);
|
||||
p12_buf.create(I0.size(), CV_32FC1);
|
||||
p21_buf.create(I0.size(), CV_32FC1);
|
||||
p22_buf.create(I0.size(), CV_32FC1);
|
||||
if (gamma)
|
||||
{
|
||||
p31_buf.create(I0.size(), CV_32FC1);
|
||||
p32_buf.create(I0.size(), CV_32FC1);
|
||||
}
|
||||
diff_buf.create(I0.size(), CV_32FC1);
|
||||
|
||||
// create the scales
|
||||
for (int s = 1; s < nscales; ++s)
|
||||
{
|
||||
cuda::resize(I0s[s-1], I0s[s], Size(), scaleStep, scaleStep);
|
||||
cuda::resize(I1s[s-1], I1s[s], Size(), scaleStep, scaleStep);
|
||||
|
||||
if (I0s[s].cols < 16 || I0s[s].rows < 16)
|
||||
{
|
||||
nscales = s;
|
||||
break;
|
||||
}
|
||||
|
||||
if (useInitialFlow)
|
||||
{
|
||||
cuda::resize(u1s[s-1], u1s[s], Size(), scaleStep, scaleStep);
|
||||
cuda::resize(u2s[s-1], u2s[s], Size(), scaleStep, scaleStep);
|
||||
|
||||
cuda::multiply(u1s[s], Scalar::all(scaleStep), u1s[s]);
|
||||
cuda::multiply(u2s[s], Scalar::all(scaleStep), u2s[s]);
|
||||
}
|
||||
else
|
||||
{
|
||||
u1s[s].create(I0s[s].size(), CV_32FC1);
|
||||
u2s[s].create(I0s[s].size(), CV_32FC1);
|
||||
}
|
||||
if (gamma)
|
||||
u3s[s].create(I0s[s].size(), CV_32FC1);
|
||||
}
|
||||
|
||||
if (!useInitialFlow)
|
||||
{
|
||||
u1s[nscales-1].setTo(Scalar::all(0));
|
||||
u2s[nscales-1].setTo(Scalar::all(0));
|
||||
}
|
||||
if (gamma)
|
||||
u3s[nscales - 1].setTo(Scalar::all(0));
|
||||
|
||||
// pyramidal structure for computing the optical flow
|
||||
for (int s = nscales - 1; s >= 0; --s)
|
||||
{
|
||||
// compute the optical flow at the current scale
|
||||
procOneScale(I0s[s], I1s[s], u1s[s], u2s[s], u3s[s]);
|
||||
|
||||
// if this was the last scale, finish now
|
||||
if (s == 0)
|
||||
break;
|
||||
|
||||
// otherwise, upsample the optical flow
|
||||
|
||||
// zoom the optical flow for the next finer scale
|
||||
cuda::resize(u1s[s], u1s[s - 1], I0s[s - 1].size());
|
||||
cuda::resize(u2s[s], u2s[s - 1], I0s[s - 1].size());
|
||||
if (gamma)
|
||||
cuda::resize(u3s[s], u3s[s - 1], I0s[s - 1].size());
|
||||
|
||||
// scale the optical flow with the appropriate zoom factor
|
||||
cuda::multiply(u1s[s - 1], Scalar::all(1/scaleStep), u1s[s - 1]);
|
||||
cuda::multiply(u2s[s - 1], Scalar::all(1/scaleStep), u2s[s - 1]);
|
||||
}
|
||||
}
|
||||
|
||||
namespace tvl1flow
|
||||
{
|
||||
void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy);
|
||||
void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx, PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho);
|
||||
void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy, cudaStream_t stream);
|
||||
void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y,
|
||||
PtrStepSzf u1, PtrStepSzf u2,
|
||||
PtrStepSzf I1w, PtrStepSzf I1wx, PtrStepSzf I1wy,
|
||||
PtrStepSzf grad, PtrStepSzf rho,
|
||||
cudaStream_t stream);
|
||||
void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy,
|
||||
PtrStepSzf grad, PtrStepSzf rho_c,
|
||||
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32,
|
||||
PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf error,
|
||||
float l_t, float theta, float gamma, bool calcError);
|
||||
void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32, float taut, const float gamma);
|
||||
float l_t, float theta, float gamma, bool calcError,
|
||||
cudaStream_t stream);
|
||||
void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3,
|
||||
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32,
|
||||
float taut, float gamma,
|
||||
cudaStream_t stream);
|
||||
}
|
||||
|
||||
void cv::cuda::OpticalFlowDual_TVL1_CUDA::procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2, GpuMat& u3)
|
||||
namespace
|
||||
{
|
||||
using namespace tvl1flow;
|
||||
|
||||
const double scaledEpsilon = epsilon * epsilon * I0.size().area();
|
||||
|
||||
CV_DbgAssert( I1.size() == I0.size() );
|
||||
CV_DbgAssert( I1.type() == I0.type() );
|
||||
CV_DbgAssert( u1.size() == I0.size() );
|
||||
CV_DbgAssert( u2.size() == u1.size() );
|
||||
|
||||
GpuMat I1x = I1x_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
GpuMat I1y = I1y_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
centeredGradient(I1, I1x, I1y);
|
||||
|
||||
GpuMat I1w = I1w_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
GpuMat I1wx = I1wx_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
GpuMat I1wy = I1wy_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
|
||||
GpuMat grad = grad_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
GpuMat rho_c = rho_c_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
|
||||
GpuMat p11 = p11_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
GpuMat p12 = p12_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
GpuMat p21 = p21_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
GpuMat p22 = p22_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
GpuMat p31, p32;
|
||||
if (gamma)
|
||||
class OpticalFlowDual_TVL1_Impl : public OpticalFlowDual_TVL1
|
||||
{
|
||||
p31 = p31_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
p32 = p32_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
}
|
||||
p11.setTo(Scalar::all(0));
|
||||
p12.setTo(Scalar::all(0));
|
||||
p21.setTo(Scalar::all(0));
|
||||
p22.setTo(Scalar::all(0));
|
||||
if (gamma)
|
||||
{
|
||||
p31.setTo(Scalar::all(0));
|
||||
p32.setTo(Scalar::all(0));
|
||||
}
|
||||
|
||||
GpuMat diff = diff_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
|
||||
const float l_t = static_cast<float>(lambda * theta);
|
||||
const float taut = static_cast<float>(tau / theta);
|
||||
|
||||
for (int warpings = 0; warpings < warps; ++warpings)
|
||||
{
|
||||
warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c);
|
||||
|
||||
double error = std::numeric_limits<double>::max();
|
||||
double prevError = 0.0;
|
||||
for (int n = 0; error > scaledEpsilon && n < iterations; ++n)
|
||||
public:
|
||||
OpticalFlowDual_TVL1_Impl(double tau, double lambda, double theta, int nscales, int warps, double epsilon,
|
||||
int iterations, double scaleStep, double gamma, bool useInitialFlow) :
|
||||
tau_(tau), lambda_(lambda), gamma_(gamma), theta_(theta), nscales_(nscales), warps_(warps),
|
||||
epsilon_(epsilon), iterations_(iterations), scaleStep_(scaleStep), useInitialFlow_(useInitialFlow)
|
||||
{
|
||||
// some tweaks to make sum operation less frequently
|
||||
bool calcError = (epsilon > 0) && (n & 0x1) && (prevError < scaledEpsilon);
|
||||
estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, diff, l_t, static_cast<float>(theta), gamma, calcError);
|
||||
if (calcError)
|
||||
}
|
||||
|
||||
virtual double getTau() const { return tau_; }
|
||||
virtual void setTau(double tau) { tau_ = tau; }
|
||||
|
||||
virtual double getLambda() const { return lambda_; }
|
||||
virtual void setLambda(double lambda) { lambda_ = lambda; }
|
||||
|
||||
virtual double getGamma() const { return gamma_; }
|
||||
virtual void setGamma(double gamma) { gamma_ = gamma; }
|
||||
|
||||
virtual double getTheta() const { return theta_; }
|
||||
virtual void setTheta(double theta) { theta_ = theta; }
|
||||
|
||||
virtual int getNumScales() const { return nscales_; }
|
||||
virtual void setNumScales(int nscales) { nscales_ = nscales; }
|
||||
|
||||
virtual int getNumWarps() const { return warps_; }
|
||||
virtual void setNumWarps(int warps) { warps_ = warps; }
|
||||
|
||||
virtual double getEpsilon() const { return epsilon_; }
|
||||
virtual void setEpsilon(double epsilon) { epsilon_ = epsilon; }
|
||||
|
||||
virtual int getNumIterations() const { return iterations_; }
|
||||
virtual void setNumIterations(int iterations) { iterations_ = iterations; }
|
||||
|
||||
virtual double getScaleStep() const { return scaleStep_; }
|
||||
virtual void setScaleStep(double scaleStep) { scaleStep_ = scaleStep; }
|
||||
|
||||
virtual bool getUseInitialFlow() const { return useInitialFlow_; }
|
||||
virtual void setUseInitialFlow(bool useInitialFlow) { useInitialFlow_ = useInitialFlow; }
|
||||
|
||||
virtual void calc(InputArray I0, InputArray I1, InputOutputArray flow, Stream& stream);
|
||||
|
||||
private:
|
||||
double tau_;
|
||||
double lambda_;
|
||||
double gamma_;
|
||||
double theta_;
|
||||
int nscales_;
|
||||
int warps_;
|
||||
double epsilon_;
|
||||
int iterations_;
|
||||
double scaleStep_;
|
||||
bool useInitialFlow_;
|
||||
|
||||
private:
|
||||
void calcImpl(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, Stream& stream);
|
||||
void procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2, GpuMat& u3, Stream& stream);
|
||||
|
||||
std::vector<GpuMat> I0s;
|
||||
std::vector<GpuMat> I1s;
|
||||
std::vector<GpuMat> u1s;
|
||||
std::vector<GpuMat> u2s;
|
||||
std::vector<GpuMat> u3s;
|
||||
|
||||
GpuMat I1x_buf;
|
||||
GpuMat I1y_buf;
|
||||
|
||||
GpuMat I1w_buf;
|
||||
GpuMat I1wx_buf;
|
||||
GpuMat I1wy_buf;
|
||||
|
||||
GpuMat grad_buf;
|
||||
GpuMat rho_c_buf;
|
||||
|
||||
GpuMat p11_buf;
|
||||
GpuMat p12_buf;
|
||||
GpuMat p21_buf;
|
||||
GpuMat p22_buf;
|
||||
GpuMat p31_buf;
|
||||
GpuMat p32_buf;
|
||||
|
||||
GpuMat diff_buf;
|
||||
GpuMat norm_buf;
|
||||
};
|
||||
|
||||
void OpticalFlowDual_TVL1_Impl::calc(InputArray _frame0, InputArray _frame1, InputOutputArray _flow, Stream& stream)
|
||||
{
|
||||
const GpuMat frame0 = _frame0.getGpuMat();
|
||||
const GpuMat frame1 = _frame1.getGpuMat();
|
||||
|
||||
BufferPool pool(stream);
|
||||
GpuMat flowx = pool.getBuffer(frame0.size(), CV_32FC1);
|
||||
GpuMat flowy = pool.getBuffer(frame0.size(), CV_32FC1);
|
||||
|
||||
calcImpl(frame0, frame1, flowx, flowy, stream);
|
||||
|
||||
GpuMat flows[] = {flowx, flowy};
|
||||
cuda::merge(flows, 2, _flow, stream);
|
||||
}
|
||||
|
||||
void OpticalFlowDual_TVL1_Impl::calcImpl(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, Stream& stream)
|
||||
{
|
||||
CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 );
|
||||
CV_Assert( I0.size() == I1.size() );
|
||||
CV_Assert( I0.type() == I1.type() );
|
||||
CV_Assert( !useInitialFlow_ || (flowx.size() == I0.size() && flowx.type() == CV_32FC1 && flowy.size() == flowx.size() && flowy.type() == flowx.type()) );
|
||||
CV_Assert( nscales_ > 0 );
|
||||
|
||||
// allocate memory for the pyramid structure
|
||||
I0s.resize(nscales_);
|
||||
I1s.resize(nscales_);
|
||||
u1s.resize(nscales_);
|
||||
u2s.resize(nscales_);
|
||||
u3s.resize(nscales_);
|
||||
|
||||
I0.convertTo(I0s[0], CV_32F, I0.depth() == CV_8U ? 1.0 : 255.0, stream);
|
||||
I1.convertTo(I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0, stream);
|
||||
|
||||
if (!useInitialFlow_)
|
||||
{
|
||||
flowx.create(I0.size(), CV_32FC1);
|
||||
flowy.create(I0.size(), CV_32FC1);
|
||||
}
|
||||
|
||||
u1s[0] = flowx;
|
||||
u2s[0] = flowy;
|
||||
if (gamma_)
|
||||
{
|
||||
u3s[0].create(I0.size(), CV_32FC1);
|
||||
}
|
||||
|
||||
I1x_buf.create(I0.size(), CV_32FC1);
|
||||
I1y_buf.create(I0.size(), CV_32FC1);
|
||||
|
||||
I1w_buf.create(I0.size(), CV_32FC1);
|
||||
I1wx_buf.create(I0.size(), CV_32FC1);
|
||||
I1wy_buf.create(I0.size(), CV_32FC1);
|
||||
|
||||
grad_buf.create(I0.size(), CV_32FC1);
|
||||
rho_c_buf.create(I0.size(), CV_32FC1);
|
||||
|
||||
p11_buf.create(I0.size(), CV_32FC1);
|
||||
p12_buf.create(I0.size(), CV_32FC1);
|
||||
p21_buf.create(I0.size(), CV_32FC1);
|
||||
p22_buf.create(I0.size(), CV_32FC1);
|
||||
if (gamma_)
|
||||
{
|
||||
p31_buf.create(I0.size(), CV_32FC1);
|
||||
p32_buf.create(I0.size(), CV_32FC1);
|
||||
}
|
||||
diff_buf.create(I0.size(), CV_32FC1);
|
||||
|
||||
// create the scales
|
||||
for (int s = 1; s < nscales_; ++s)
|
||||
{
|
||||
cuda::resize(I0s[s-1], I0s[s], Size(), scaleStep_, scaleStep_, INTER_LINEAR, stream);
|
||||
cuda::resize(I1s[s-1], I1s[s], Size(), scaleStep_, scaleStep_, INTER_LINEAR, stream);
|
||||
|
||||
if (I0s[s].cols < 16 || I0s[s].rows < 16)
|
||||
{
|
||||
error = cuda::sum(diff, norm_buf)[0];
|
||||
prevError = error;
|
||||
nscales_ = s;
|
||||
break;
|
||||
}
|
||||
|
||||
if (useInitialFlow_)
|
||||
{
|
||||
cuda::resize(u1s[s-1], u1s[s], Size(), scaleStep_, scaleStep_, INTER_LINEAR, stream);
|
||||
cuda::resize(u2s[s-1], u2s[s], Size(), scaleStep_, scaleStep_, INTER_LINEAR, stream);
|
||||
|
||||
cuda::multiply(u1s[s], Scalar::all(scaleStep_), u1s[s], 1, -1, stream);
|
||||
cuda::multiply(u2s[s], Scalar::all(scaleStep_), u2s[s], 1, -1, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
error = std::numeric_limits<double>::max();
|
||||
prevError -= scaledEpsilon;
|
||||
u1s[s].create(I0s[s].size(), CV_32FC1);
|
||||
u2s[s].create(I0s[s].size(), CV_32FC1);
|
||||
}
|
||||
if (gamma_)
|
||||
{
|
||||
u3s[s].create(I0s[s].size(), CV_32FC1);
|
||||
}
|
||||
}
|
||||
|
||||
if (!useInitialFlow_)
|
||||
{
|
||||
u1s[nscales_-1].setTo(Scalar::all(0), stream);
|
||||
u2s[nscales_-1].setTo(Scalar::all(0), stream);
|
||||
}
|
||||
if (gamma_)
|
||||
{
|
||||
u3s[nscales_ - 1].setTo(Scalar::all(0), stream);
|
||||
}
|
||||
|
||||
// pyramidal structure for computing the optical flow
|
||||
for (int s = nscales_ - 1; s >= 0; --s)
|
||||
{
|
||||
// compute the optical flow at the current scale
|
||||
procOneScale(I0s[s], I1s[s], u1s[s], u2s[s], u3s[s], stream);
|
||||
|
||||
// if this was the last scale, finish now
|
||||
if (s == 0)
|
||||
break;
|
||||
|
||||
// otherwise, upsample the optical flow
|
||||
|
||||
// zoom the optical flow for the next finer scale
|
||||
cuda::resize(u1s[s], u1s[s - 1], I0s[s - 1].size(), 0, 0, INTER_LINEAR, stream);
|
||||
cuda::resize(u2s[s], u2s[s - 1], I0s[s - 1].size(), 0, 0, INTER_LINEAR, stream);
|
||||
if (gamma_)
|
||||
{
|
||||
cuda::resize(u3s[s], u3s[s - 1], I0s[s - 1].size(), 0, 0, INTER_LINEAR, stream);
|
||||
}
|
||||
|
||||
estimateDualVariables(u1, u2, u3, p11, p12, p21, p22, p31, p32, taut, gamma);
|
||||
// scale the optical flow with the appropriate zoom factor
|
||||
cuda::multiply(u1s[s - 1], Scalar::all(1/scaleStep_), u1s[s - 1], 1, -1, stream);
|
||||
cuda::multiply(u2s[s - 1], Scalar::all(1/scaleStep_), u2s[s - 1], 1, -1, stream);
|
||||
}
|
||||
}
|
||||
|
||||
void OpticalFlowDual_TVL1_Impl::procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2, GpuMat& u3, Stream& _stream)
|
||||
{
|
||||
using namespace tvl1flow;
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(_stream);
|
||||
|
||||
const double scaledEpsilon = epsilon_ * epsilon_ * I0.size().area();
|
||||
|
||||
CV_DbgAssert( I1.size() == I0.size() );
|
||||
CV_DbgAssert( I1.type() == I0.type() );
|
||||
CV_DbgAssert( u1.size() == I0.size() );
|
||||
CV_DbgAssert( u2.size() == u1.size() );
|
||||
|
||||
GpuMat I1x = I1x_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
GpuMat I1y = I1y_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
centeredGradient(I1, I1x, I1y, stream);
|
||||
|
||||
GpuMat I1w = I1w_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
GpuMat I1wx = I1wx_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
GpuMat I1wy = I1wy_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
|
||||
GpuMat grad = grad_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
GpuMat rho_c = rho_c_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
|
||||
GpuMat p11 = p11_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
GpuMat p12 = p12_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
GpuMat p21 = p21_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
GpuMat p22 = p22_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
GpuMat p31, p32;
|
||||
if (gamma_)
|
||||
{
|
||||
p31 = p31_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
p32 = p32_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
}
|
||||
p11.setTo(Scalar::all(0), _stream);
|
||||
p12.setTo(Scalar::all(0), _stream);
|
||||
p21.setTo(Scalar::all(0), _stream);
|
||||
p22.setTo(Scalar::all(0), _stream);
|
||||
if (gamma_)
|
||||
{
|
||||
p31.setTo(Scalar::all(0), _stream);
|
||||
p32.setTo(Scalar::all(0), _stream);
|
||||
}
|
||||
|
||||
GpuMat diff = diff_buf(Rect(0, 0, I0.cols, I0.rows));
|
||||
|
||||
const float l_t = static_cast<float>(lambda_ * theta_);
|
||||
const float taut = static_cast<float>(tau_ / theta_);
|
||||
|
||||
for (int warpings = 0; warpings < warps_; ++warpings)
|
||||
{
|
||||
warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c, stream);
|
||||
|
||||
double error = std::numeric_limits<double>::max();
|
||||
double prevError = 0.0;
|
||||
for (int n = 0; error > scaledEpsilon && n < iterations_; ++n)
|
||||
{
|
||||
// some tweaks to make sum operation less frequently
|
||||
bool calcError = (epsilon_ > 0) && (n & 0x1) && (prevError < scaledEpsilon);
|
||||
estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, diff, l_t, static_cast<float>(theta_), gamma_, calcError, stream);
|
||||
if (calcError)
|
||||
{
|
||||
_stream.waitForCompletion();
|
||||
error = cuda::sum(diff, norm_buf)[0];
|
||||
prevError = error;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = std::numeric_limits<double>::max();
|
||||
prevError -= scaledEpsilon;
|
||||
}
|
||||
|
||||
estimateDualVariables(u1, u2, u3, p11, p12, p21, p22, p31, p32, taut, gamma_, stream);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cv::cuda::OpticalFlowDual_TVL1_CUDA::collectGarbage()
|
||||
Ptr<OpticalFlowDual_TVL1> cv::cuda::OpticalFlowDual_TVL1::create(
|
||||
double tau, double lambda, double theta, int nscales, int warps,
|
||||
double epsilon, int iterations, double scaleStep, double gamma, bool useInitialFlow)
|
||||
{
|
||||
I0s.clear();
|
||||
I1s.clear();
|
||||
u1s.clear();
|
||||
u2s.clear();
|
||||
u3s.clear();
|
||||
|
||||
I1x_buf.release();
|
||||
I1y_buf.release();
|
||||
|
||||
I1w_buf.release();
|
||||
I1wx_buf.release();
|
||||
I1wy_buf.release();
|
||||
|
||||
grad_buf.release();
|
||||
rho_c_buf.release();
|
||||
|
||||
p11_buf.release();
|
||||
p12_buf.release();
|
||||
p21_buf.release();
|
||||
p22_buf.release();
|
||||
if (gamma)
|
||||
{
|
||||
p31_buf.release();
|
||||
p32_buf.release();
|
||||
}
|
||||
diff_buf.release();
|
||||
norm_buf.release();
|
||||
return makePtr<OpticalFlowDual_TVL1_Impl>(tau, lambda, theta, nscales, warps,
|
||||
epsilon, iterations, scaleStep, gamma, useInitialFlow);
|
||||
}
|
||||
|
||||
#endif // !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
|
Loading…
Reference in New Issue
Block a user