Merge pull request #3635 from jet47:cuda-optflow-refactoring

This commit is contained in:
Vadim Pisarevsky 2015-01-22 09:45:19 +00:00
commit 3f1fb281be
27 changed files with 1577 additions and 1858 deletions

View File

@ -71,8 +71,9 @@ public:
CV_EXPORTS Ptr<ImagePyramid> createImagePyramid(InputArray img, int nLayers = -1, Stream& stream = Stream::Null());
////////////////////////////////////////////////////
//
// GMG
//
/** @brief Background/Foreground Segmentation Algorithm.
@ -125,8 +126,9 @@ public:
CV_EXPORTS Ptr<cuda::BackgroundSubtractorGMG>
createBackgroundSubtractorGMG(int initializationFrames = 120, double decisionThreshold = 0.8);
////////////////////////////////////////////////////
//
// FGD
//
/** @brief The class discriminates between foreground and background pixels by building and maintaining a model
of the background.
@ -180,6 +182,51 @@ struct CV_EXPORTS FGDParams
CV_EXPORTS Ptr<cuda::BackgroundSubtractorFGD>
createBackgroundSubtractorFGD(const FGDParams& params = FGDParams());
//
// Optical flow
//
//! Calculates optical flow for 2 images using block matching algorithm */
CV_EXPORTS void calcOpticalFlowBM(const GpuMat& prev, const GpuMat& curr,
Size block_size, Size shift_size, Size max_range, bool use_previous,
GpuMat& velx, GpuMat& vely, GpuMat& buf,
Stream& stream = Stream::Null());
class CV_EXPORTS FastOpticalFlowBM
{
public:
void operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, int search_window = 21, int block_window = 7, Stream& s = Stream::Null());
private:
GpuMat buffer;
GpuMat extended_I0;
GpuMat extended_I1;
};
/** @brief Interpolates frames (images) using provided optical flow (displacement field).
@param frame0 First frame (32-bit floating point images, single channel).
@param frame1 Second frame. Must have the same type and size as frame0 .
@param fu Forward horizontal displacement.
@param fv Forward vertical displacement.
@param bu Backward horizontal displacement.
@param bv Backward vertical displacement.
@param pos New frame position.
@param newFrame Output image.
@param buf Temporary buffer, will have width x 6\*height size, CV_32FC1 type and contain 6
GpuMat: occlusion masks for first frame, occlusion masks for second, interpolated forward
horizontal flow, interpolated forward vertical flow, interpolated backward horizontal flow,
interpolated backward vertical flow.
@param stream Stream for the asynchronous version.
*/
CV_EXPORTS void interpolateFrames(const GpuMat& frame0, const GpuMat& frame1,
const GpuMat& fu, const GpuMat& fv,
const GpuMat& bu, const GpuMat& bv,
float pos, GpuMat& newFrame, GpuMat& buf,
Stream& stream = Stream::Null());
CV_EXPORTS void createOpticalFlowNeedleMap(const GpuMat& u, const GpuMat& v, GpuMat& vertex, GpuMat& colors);
//! @}
}}

View File

@ -61,49 +61,94 @@ namespace cv { namespace cuda {
//! @addtogroup cudaoptflow
//! @{
/** @brief Class computing the optical flow for two images using Brox et al Optical Flow algorithm
(@cite Brox2004). :
//
// Interface
//
/** @brief Base interface for dense optical flow algorithms.
*/
class CV_EXPORTS BroxOpticalFlow
class CV_EXPORTS DenseOpticalFlow : public Algorithm
{
public:
BroxOpticalFlow(float alpha_, float gamma_, float scale_factor_, int inner_iterations_, int outer_iterations_, int solver_iterations_) :
alpha(alpha_), gamma(gamma_), scale_factor(scale_factor_),
inner_iterations(inner_iterations_), outer_iterations(outer_iterations_), solver_iterations(solver_iterations_)
{
}
/** @brief Calculates a dense optical flow.
//! Compute optical flow
//! frame0 - source frame (supports only CV_32FC1 type)
//! frame1 - frame to track (with the same size and type as frame0)
//! u - flow horizontal component (along x axis)
//! v - flow vertical component (along y axis)
void operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& stream = Stream::Null());
//! flow smoothness
float alpha;
//! gradient constancy importance
float gamma;
//! pyramid scale factor
float scale_factor;
//! number of lagged non-linearity iterations (inner loop)
int inner_iterations;
//! number of warping iterations (number of pyramid levels)
int outer_iterations;
//! number of linear system solver iterations
int solver_iterations;
GpuMat buf;
@param I0 first input image.
@param I1 second input image of the same size and the same type as I0.
@param flow computed flow image that has the same size as I0 and type CV_32FC2.
@param stream Stream for the asynchronous version.
*/
virtual void calc(InputArray I0, InputArray I1, InputOutputArray flow, Stream& stream = Stream::Null()) = 0;
};
/** @brief Class used for calculating an optical flow.
/** @brief Base interface for sparse optical flow algorithms.
*/
class CV_EXPORTS SparseOpticalFlow : public Algorithm
{
public:
/** @brief Calculates a sparse optical flow.
The class can calculate an optical flow for a sparse feature set or dense optical flow using the
@param prevImg First input image.
@param nextImg Second input image of the same size and the same type as prevImg.
@param prevPts Vector of 2D points for which the flow needs to be found.
@param nextPts Output vector of 2D points containing the calculated new positions of input features in the second image.
@param status Output status vector. Each element of the vector is set to 1 if the
flow for the corresponding features has been found. Otherwise, it is set to 0.
@param err Optional output vector that contains error response for each point (inverse confidence).
@param stream Stream for the asynchronous version.
*/
virtual void calc(InputArray prevImg, InputArray nextImg,
InputArray prevPts, InputOutputArray nextPts,
OutputArray status,
OutputArray err = cv::noArray(),
Stream& stream = Stream::Null()) = 0;
};
//
// BroxOpticalFlow
//
/** @brief Class computing the optical flow for two images using Brox et al Optical Flow algorithm (@cite Brox2004).
*/
class CV_EXPORTS BroxOpticalFlow : public DenseOpticalFlow
{
public:
virtual double getFlowSmoothness() const = 0;
virtual void setFlowSmoothness(double alpha) = 0;
virtual double getGradientConstancyImportance() const = 0;
virtual void setGradientConstancyImportance(double gamma) = 0;
virtual double getPyramidScaleFactor() const = 0;
virtual void setPyramidScaleFactor(double scale_factor) = 0;
//! number of lagged non-linearity iterations (inner loop)
virtual int getInnerIterations() const = 0;
virtual void setInnerIterations(int inner_iterations) = 0;
//! number of warping iterations (number of pyramid levels)
virtual int getOuterIterations() const = 0;
virtual void setOuterIterations(int outer_iterations) = 0;
//! number of linear system solver iterations
virtual int getSolverIterations() const = 0;
virtual void setSolverIterations(int solver_iterations) = 0;
static Ptr<BroxOpticalFlow> create(
double alpha = 0.197,
double gamma = 50.0,
double scale_factor = 0.8,
int inner_iterations = 5,
int outer_iterations = 150,
int solver_iterations = 10);
};
//
// PyrLKOpticalFlow
//
/** @brief Class used for calculating a sparse optical flow.
The class can calculate an optical flow for a sparse feature set using the
iterative Lucas-Kanade method with pyramids.
@sa calcOpticalFlowPyrLK
@ -112,158 +157,116 @@ iterative Lucas-Kanade method with pyramids.
- An example of the Lucas Kanade optical flow algorithm can be found at
opencv_source_code/samples/gpu/pyrlk_optical_flow.cpp
*/
class CV_EXPORTS PyrLKOpticalFlow
class CV_EXPORTS SparsePyrLKOpticalFlow : public SparseOpticalFlow
{
public:
PyrLKOpticalFlow();
virtual Size getWinSize() const = 0;
virtual void setWinSize(Size winSize) = 0;
/** @brief Calculate an optical flow for a sparse feature set.
virtual int getMaxLevel() const = 0;
virtual void setMaxLevel(int maxLevel) = 0;
@param prevImg First 8-bit input image (supports both grayscale and color images).
@param nextImg Second input image of the same size and the same type as prevImg .
@param prevPts Vector of 2D points for which the flow needs to be found. It must be one row matrix
with CV_32FC2 type.
@param nextPts Output vector of 2D points (with single-precision floating-point coordinates)
containing the calculated new positions of input features in the second image. When useInitialFlow
is true, the vector must have the same size as in the input.
@param status Output status vector (CV_8UC1 type). Each element of the vector is set to 1 if the
flow for the corresponding features has been found. Otherwise, it is set to 0.
@param err Output vector (CV_32FC1 type) that contains the difference between patches around the
original and moved points or min eigen value if getMinEigenVals is checked. It can be NULL, if not
needed.
virtual int getNumIters() const = 0;
virtual void setNumIters(int iters) = 0;
@sa calcOpticalFlowPyrLK
*/
void sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts,
GpuMat& status, GpuMat* err = 0);
virtual bool getUseInitialFlow() const = 0;
virtual void setUseInitialFlow(bool useInitialFlow) = 0;
/** @brief Calculate dense optical flow.
@param prevImg First 8-bit grayscale input image.
@param nextImg Second input image of the same size and the same type as prevImg .
@param u Horizontal component of the optical flow of the same size as input images, 32-bit
floating-point, single-channel
@param v Vertical component of the optical flow of the same size as input images, 32-bit
floating-point, single-channel
@param err Output vector (CV_32FC1 type) that contains the difference between patches around the
original and moved points or min eigen value if getMinEigenVals is checked. It can be NULL, if not
needed.
*/
void dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err = 0);
/** @brief Releases inner buffers memory.
*/
void releaseMemory();
Size winSize;
int maxLevel;
int iters;
bool useInitialFlow;
private:
std::vector<GpuMat> prevPyr_;
std::vector<GpuMat> nextPyr_;
GpuMat buf_;
GpuMat uPyr_[2];
GpuMat vPyr_[2];
static Ptr<SparsePyrLKOpticalFlow> create(
Size winSize = Size(21, 21),
int maxLevel = 3,
int iters = 30,
bool useInitialFlow = false);
};
/** @brief Class computing a dense optical flow using the Gunnar Farnebacks algorithm. :
/** @brief Class used for calculating a dense optical flow.
The class can calculate an optical flow for a dense optical flow using the
iterative Lucas-Kanade method with pyramids.
*/
class CV_EXPORTS FarnebackOpticalFlow
class CV_EXPORTS DensePyrLKOpticalFlow : public DenseOpticalFlow
{
public:
FarnebackOpticalFlow()
{
numLevels = 5;
pyrScale = 0.5;
fastPyramids = false;
winSize = 13;
numIters = 10;
polyN = 5;
polySigma = 1.1;
flags = 0;
}
virtual Size getWinSize() const = 0;
virtual void setWinSize(Size winSize) = 0;
int numLevels;
double pyrScale;
bool fastPyramids;
int winSize;
int numIters;
int polyN;
double polySigma;
int flags;
virtual int getMaxLevel() const = 0;
virtual void setMaxLevel(int maxLevel) = 0;
/** @brief Computes a dense optical flow using the Gunnar Farnebacks algorithm.
virtual int getNumIters() const = 0;
virtual void setNumIters(int iters) = 0;
@param frame0 First 8-bit gray-scale input image
@param frame1 Second 8-bit gray-scale input image
@param flowx Flow horizontal component
@param flowy Flow vertical component
@param s Stream
virtual bool getUseInitialFlow() const = 0;
virtual void setUseInitialFlow(bool useInitialFlow) = 0;
@sa calcOpticalFlowFarneback
*/
void operator ()(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &s = Stream::Null());
/** @brief Releases unused auxiliary memory buffers.
*/
void releaseMemory()
{
frames_[0].release();
frames_[1].release();
pyrLevel_[0].release();
pyrLevel_[1].release();
M_.release();
bufM_.release();
R_[0].release();
R_[1].release();
blurredFrame_[0].release();
blurredFrame_[1].release();
pyramid0_.clear();
pyramid1_.clear();
}
private:
void prepareGaussian(
int n, double sigma, float *g, float *xg, float *xxg,
double &ig11, double &ig03, double &ig33, double &ig55);
void setPolynomialExpansionConsts(int n, double sigma);
void updateFlow_boxFilter(
const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy,
GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
void updateFlow_gaussianBlur(
const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy,
GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
GpuMat frames_[2];
GpuMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
std::vector<GpuMat> pyramid0_, pyramid1_;
static Ptr<DensePyrLKOpticalFlow> create(
Size winSize = Size(13, 13),
int maxLevel = 3,
int iters = 30,
bool useInitialFlow = false);
};
// Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
//
// see reference:
// [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
// [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
class CV_EXPORTS OpticalFlowDual_TVL1_CUDA
// FarnebackOpticalFlow
//
/** @brief Class computing a dense optical flow using the Gunnar Farnebacks algorithm.
*/
class CV_EXPORTS FarnebackOpticalFlow : public DenseOpticalFlow
{
public:
OpticalFlowDual_TVL1_CUDA();
virtual int getNumLevels() const = 0;
virtual void setNumLevels(int numLevels) = 0;
void operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy);
virtual double getPyrScale() const = 0;
virtual void setPyrScale(double pyrScale) = 0;
void collectGarbage();
virtual bool getFastPyramids() const = 0;
virtual void setFastPyramids(bool fastPyramids) = 0;
virtual int getWinSize() const = 0;
virtual void setWinSize(int winSize) = 0;
virtual int getNumIters() const = 0;
virtual void setNumIters(int numIters) = 0;
virtual int getPolyN() const = 0;
virtual void setPolyN(int polyN) = 0;
virtual double getPolySigma() const = 0;
virtual void setPolySigma(double polySigma) = 0;
virtual int getFlags() const = 0;
virtual void setFlags(int flags) = 0;
static Ptr<FarnebackOpticalFlow> create(
int numLevels = 5,
double pyrScale = 0.5,
bool fastPyramids = false,
int winSize = 13,
int numIters = 10,
int polyN = 5,
double polySigma = 1.1,
int flags = 0);
};
//
// OpticalFlowDual_TVL1
//
/** @brief Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method.
*
* @sa C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
* @sa Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
*/
class CV_EXPORTS OpticalFlowDual_TVL1 : public DenseOpticalFlow
{
public:
/**
* Time step of the numerical scheme.
*/
double tau;
virtual double getTau() const = 0;
virtual void setTau(double tau) = 0;
/**
* Weight parameter for the data term, attachment parameter.
@ -271,7 +274,8 @@ public:
* The smaller this parameter is, the smoother the solutions we obtain.
* It depends on the range of motions of the images, so its value should be adapted to each image sequence.
*/
double lambda;
virtual double getLambda() const = 0;
virtual void setLambda(double lambda) = 0;
/**
* Weight parameter for (u - v)^2, tightness parameter.
@ -279,20 +283,23 @@ public:
* In theory, it should have a small value in order to maintain both parts in correspondence.
* The method is stable for a large range of values of this parameter.
*/
virtual double getGamma() const = 0;
virtual void setGamma(double gamma) = 0;
double gamma;
/**
* parameter used for motion estimation. It adds a variable allowing for illumination variations
* Set this parameter to 1. if you have varying illumination.
* See: Chambolle et al, A First-Order Primal-Dual Algorithm for Convex Problems with Applications to Imaging
* Journal of Mathematical imaging and vision, may 2011 Vol 40 issue 1, pp 120-145
*/
double theta;
* parameter used for motion estimation. It adds a variable allowing for illumination variations
* Set this parameter to 1. if you have varying illumination.
* See: Chambolle et al, A First-Order Primal-Dual Algorithm for Convex Problems with Applications to Imaging
* Journal of Mathematical imaging and vision, may 2011 Vol 40 issue 1, pp 120-145
*/
virtual double getTheta() const = 0;
virtual void setTheta(double theta) = 0;
/**
* Number of scales used to create the pyramid of images.
*/
int nscales;
virtual int getNumScales() const = 0;
virtual void setNumScales(int nscales) = 0;
/**
* Number of warpings per scale.
@ -300,94 +307,41 @@ public:
* This is a parameter that assures the stability of the method.
* It also affects the running time, so it is a compromise between speed and accuracy.
*/
int warps;
virtual int getNumWarps() const = 0;
virtual void setNumWarps(int warps) = 0;
/**
* Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
* A small value will yield more accurate solutions at the expense of a slower convergence.
*/
double epsilon;
virtual double getEpsilon() const = 0;
virtual void setEpsilon(double epsilon) = 0;
/**
* Stopping criterion iterations number used in the numerical scheme.
*/
int iterations;
virtual int getNumIterations() const = 0;
virtual void setNumIterations(int iterations) = 0;
double scaleStep;
virtual double getScaleStep() const = 0;
virtual void setScaleStep(double scaleStep) = 0;
bool useInitialFlow;
virtual bool getUseInitialFlow() const = 0;
virtual void setUseInitialFlow(bool useInitialFlow) = 0;
private:
void procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2, GpuMat& u3);
std::vector<GpuMat> I0s;
std::vector<GpuMat> I1s;
std::vector<GpuMat> u1s;
std::vector<GpuMat> u2s;
std::vector<GpuMat> u3s;
GpuMat I1x_buf;
GpuMat I1y_buf;
GpuMat I1w_buf;
GpuMat I1wx_buf;
GpuMat I1wy_buf;
GpuMat grad_buf;
GpuMat rho_c_buf;
GpuMat p11_buf;
GpuMat p12_buf;
GpuMat p21_buf;
GpuMat p22_buf;
GpuMat p31_buf;
GpuMat p32_buf;
GpuMat diff_buf;
GpuMat norm_buf;
static Ptr<OpticalFlowDual_TVL1> create(
double tau = 0.25,
double lambda = 0.15,
double theta = 0.3,
int nscales = 5,
int warps = 5,
double epsilon = 0.01,
int iterations = 300,
double scaleStep = 0.8,
double gamma = 0.0,
bool useInitialFlow = false);
};
//! Calculates optical flow for 2 images using block matching algorithm */
CV_EXPORTS void calcOpticalFlowBM(const GpuMat& prev, const GpuMat& curr,
Size block_size, Size shift_size, Size max_range, bool use_previous,
GpuMat& velx, GpuMat& vely, GpuMat& buf,
Stream& stream = Stream::Null());
class CV_EXPORTS FastOpticalFlowBM
{
public:
void operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, int search_window = 21, int block_window = 7, Stream& s = Stream::Null());
private:
GpuMat buffer;
GpuMat extended_I0;
GpuMat extended_I1;
};
/** @brief Interpolates frames (images) using provided optical flow (displacement field).
@param frame0 First frame (32-bit floating point images, single channel).
@param frame1 Second frame. Must have the same type and size as frame0 .
@param fu Forward horizontal displacement.
@param fv Forward vertical displacement.
@param bu Backward horizontal displacement.
@param bv Backward vertical displacement.
@param pos New frame position.
@param newFrame Output image.
@param buf Temporary buffer, will have width x 6\*height size, CV_32FC1 type and contain 6
GpuMat: occlusion masks for first frame, occlusion masks for second, interpolated forward
horizontal flow, interpolated forward vertical flow, interpolated backward horizontal flow,
interpolated backward vertical flow.
@param stream Stream for the asynchronous version.
*/
CV_EXPORTS void interpolateFrames(const GpuMat& frame0, const GpuMat& frame1,
const GpuMat& fu, const GpuMat& fv,
const GpuMat& bu, const GpuMat& bv,
float pos, GpuMat& newFrame, GpuMat& buf,
Stream& stream = Stream::Null());
CV_EXPORTS void createOpticalFlowNeedleMap(const GpuMat& u, const GpuMat& v, GpuMat& vertex, GpuMat& colors);
//! @}
}} // namespace cv { namespace cuda {

View File

@ -46,91 +46,10 @@ using namespace std;
using namespace testing;
using namespace perf;
//////////////////////////////////////////////////////
// InterpolateFrames
typedef pair<string, string> pair_string;
DEF_PARAM_TEST_1(ImagePair, pair_string);
PERF_TEST_P(ImagePair, InterpolateFrames,
Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
{
cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame0.empty());
cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty());
frame0.convertTo(frame0, CV_32FC1, 1.0 / 255.0);
frame1.convertTo(frame1, CV_32FC1, 1.0 / 255.0);
if (PERF_RUN_CUDA())
{
const cv::cuda::GpuMat d_frame0(frame0);
const cv::cuda::GpuMat d_frame1(frame1);
cv::cuda::GpuMat d_fu, d_fv;
cv::cuda::GpuMat d_bu, d_bv;
cv::cuda::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/,
10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/);
d_flow(d_frame0, d_frame1, d_fu, d_fv);
d_flow(d_frame1, d_frame0, d_bu, d_bv);
cv::cuda::GpuMat newFrame;
cv::cuda::GpuMat d_buf;
TEST_CYCLE() cv::cuda::interpolateFrames(d_frame0, d_frame1, d_fu, d_fv, d_bu, d_bv, 0.5f, newFrame, d_buf);
CUDA_SANITY_CHECK(newFrame, 1e-4);
}
else
{
FAIL_NO_CPU();
}
}
//////////////////////////////////////////////////////
// CreateOpticalFlowNeedleMap
PERF_TEST_P(ImagePair, CreateOpticalFlowNeedleMap,
Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
{
cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame0.empty());
cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty());
frame0.convertTo(frame0, CV_32FC1, 1.0 / 255.0);
frame1.convertTo(frame1, CV_32FC1, 1.0 / 255.0);
if (PERF_RUN_CUDA())
{
const cv::cuda::GpuMat d_frame0(frame0);
const cv::cuda::GpuMat d_frame1(frame1);
cv::cuda::GpuMat u;
cv::cuda::GpuMat v;
cv::cuda::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/,
10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/);
d_flow(d_frame0, d_frame1, u, v);
cv::cuda::GpuMat vertex, colors;
TEST_CYCLE() cv::cuda::createOpticalFlowNeedleMap(u, v, vertex, colors);
CUDA_SANITY_CHECK(vertex, 1e-6);
CUDA_SANITY_CHECK(colors);
}
else
{
FAIL_NO_CPU();
}
}
//////////////////////////////////////////////////////
// BroxOpticalFlow
@ -152,13 +71,19 @@ PERF_TEST_P(ImagePair, BroxOpticalFlow,
{
const cv::cuda::GpuMat d_frame0(frame0);
const cv::cuda::GpuMat d_frame1(frame1);
cv::cuda::GpuMat u;
cv::cuda::GpuMat v;
cv::cuda::GpuMat flow;
cv::cuda::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/,
10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/);
cv::Ptr<cv::cuda::BroxOpticalFlow> d_alg =
cv::cuda::BroxOpticalFlow::create(0.197 /*alpha*/, 50.0 /*gamma*/, 0.8 /*scale_factor*/,
10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/);
TEST_CYCLE() d_flow(d_frame0, d_frame1, u, v);
TEST_CYCLE() d_alg->calc(d_frame0, d_frame1, flow);
cv::cuda::GpuMat flows[2];
cv::cuda::split(flow, flows);
cv::cuda::GpuMat u = flows[0];
cv::cuda::GpuMat v = flows[1];
CUDA_SANITY_CHECK(u, 1e-1);
CUDA_SANITY_CHECK(v, 1e-1);
@ -210,17 +135,17 @@ PERF_TEST_P(ImagePair_Gray_NPts_WinSz_Levels_Iters, PyrLKOpticalFlowSparse,
{
const cv::cuda::GpuMat d_pts(pts.reshape(2, 1));
cv::cuda::PyrLKOpticalFlow d_pyrLK;
d_pyrLK.winSize = cv::Size(winSize, winSize);
d_pyrLK.maxLevel = levels - 1;
d_pyrLK.iters = iters;
cv::Ptr<cv::cuda::SparsePyrLKOpticalFlow> d_pyrLK =
cv::cuda::SparsePyrLKOpticalFlow::create(cv::Size(winSize, winSize),
levels - 1,
iters);
const cv::cuda::GpuMat d_frame0(frame0);
const cv::cuda::GpuMat d_frame1(frame1);
cv::cuda::GpuMat nextPts;
cv::cuda::GpuMat status;
TEST_CYCLE() d_pyrLK.sparse(d_frame0, d_frame1, d_pts, nextPts, status);
TEST_CYCLE() d_pyrLK->calc(d_frame0, d_frame1, d_pts, nextPts, status);
CUDA_SANITY_CHECK(nextPts);
CUDA_SANITY_CHECK(status);
@ -270,15 +195,20 @@ PERF_TEST_P(ImagePair_WinSz_Levels_Iters, PyrLKOpticalFlowDense,
{
const cv::cuda::GpuMat d_frame0(frame0);
const cv::cuda::GpuMat d_frame1(frame1);
cv::cuda::GpuMat u;
cv::cuda::GpuMat v;
cv::cuda::GpuMat flow;
cv::cuda::PyrLKOpticalFlow d_pyrLK;
d_pyrLK.winSize = cv::Size(winSize, winSize);
d_pyrLK.maxLevel = levels - 1;
d_pyrLK.iters = iters;
cv::Ptr<cv::cuda::DensePyrLKOpticalFlow> d_pyrLK =
cv::cuda::DensePyrLKOpticalFlow::create(cv::Size(winSize, winSize),
levels - 1,
iters);
TEST_CYCLE() d_pyrLK.dense(d_frame0, d_frame1, u, v);
TEST_CYCLE() d_pyrLK->calc(d_frame0, d_frame1, flow);
cv::cuda::GpuMat flows[2];
cv::cuda::split(flow, flows);
cv::cuda::GpuMat u = flows[0];
cv::cuda::GpuMat v = flows[1];
CUDA_SANITY_CHECK(u);
CUDA_SANITY_CHECK(v);
@ -315,19 +245,19 @@ PERF_TEST_P(ImagePair, FarnebackOpticalFlow,
{
const cv::cuda::GpuMat d_frame0(frame0);
const cv::cuda::GpuMat d_frame1(frame1);
cv::cuda::GpuMat u;
cv::cuda::GpuMat v;
cv::cuda::GpuMat flow;
cv::cuda::FarnebackOpticalFlow d_farneback;
d_farneback.numLevels = numLevels;
d_farneback.pyrScale = pyrScale;
d_farneback.winSize = winSize;
d_farneback.numIters = numIters;
d_farneback.polyN = polyN;
d_farneback.polySigma = polySigma;
d_farneback.flags = flags;
cv::Ptr<cv::cuda::FarnebackOpticalFlow> d_farneback =
cv::cuda::FarnebackOpticalFlow::create(numLevels, pyrScale, false, winSize,
numIters, polyN, polySigma, flags);
TEST_CYCLE() d_farneback(d_frame0, d_frame1, u, v);
TEST_CYCLE() d_farneback->calc(d_frame0, d_frame1, flow);
cv::cuda::GpuMat flows[2];
cv::cuda::split(flow, flows);
cv::cuda::GpuMat u = flows[0];
cv::cuda::GpuMat v = flows[1];
CUDA_SANITY_CHECK(u, 1e-4);
CUDA_SANITY_CHECK(v, 1e-4);
@ -360,12 +290,18 @@ PERF_TEST_P(ImagePair, OpticalFlowDual_TVL1,
{
const cv::cuda::GpuMat d_frame0(frame0);
const cv::cuda::GpuMat d_frame1(frame1);
cv::cuda::GpuMat u;
cv::cuda::GpuMat v;
cv::cuda::GpuMat flow;
cv::cuda::OpticalFlowDual_TVL1_CUDA d_alg;
cv::Ptr<cv::cuda::OpticalFlowDual_TVL1> d_alg =
cv::cuda::OpticalFlowDual_TVL1::create();
TEST_CYCLE() d_alg(d_frame0, d_frame1, u, v);
TEST_CYCLE() d_alg->calc(d_frame0, d_frame1, flow);
cv::cuda::GpuMat flows[2];
cv::cuda::split(flow, flows);
cv::cuda::GpuMat u = flows[0];
cv::cuda::GpuMat v = flows[1];
CUDA_SANITY_CHECK(u, 1e-1);
CUDA_SANITY_CHECK(v, 1e-1);
@ -383,72 +319,3 @@ PERF_TEST_P(ImagePair, OpticalFlowDual_TVL1,
CPU_SANITY_CHECK(flow);
}
}
//////////////////////////////////////////////////////
// OpticalFlowBM
PERF_TEST_P(ImagePair, OpticalFlowBM,
Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
{
declare.time(400);
const cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame0.empty());
const cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty());
const cv::Size block_size(16, 16);
const cv::Size shift_size(1, 1);
const cv::Size max_range(16, 16);
if (PERF_RUN_CUDA())
{
const cv::cuda::GpuMat d_frame0(frame0);
const cv::cuda::GpuMat d_frame1(frame1);
cv::cuda::GpuMat u, v, buf;
TEST_CYCLE() cv::cuda::calcOpticalFlowBM(d_frame0, d_frame1, block_size, shift_size, max_range, false, u, v, buf);
CUDA_SANITY_CHECK(u);
CUDA_SANITY_CHECK(v);
}
else
{
FAIL_NO_CPU();
}
}
PERF_TEST_P(ImagePair, DISABLED_FastOpticalFlowBM,
Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
{
declare.time(400);
const cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame0.empty());
const cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty());
const cv::Size block_size(16, 16);
const cv::Size shift_size(1, 1);
const cv::Size max_range(16, 16);
if (PERF_RUN_CUDA())
{
const cv::cuda::GpuMat d_frame0(frame0);
const cv::cuda::GpuMat d_frame1(frame1);
cv::cuda::GpuMat u, v;
cv::cuda::FastOpticalFlowBM fastBM;
TEST_CYCLE() fastBM(d_frame0, d_frame1, u, v, max_range.width, block_size.width);
CUDA_SANITY_CHECK(u, 2);
CUDA_SANITY_CHECK(v, 2);
}
else
{
FAIL_NO_CPU();
}
}

View File

@ -55,6 +55,7 @@
#include "opencv2/ts/cuda_perf.hpp"
#include "opencv2/cudaoptflow.hpp"
#include "opencv2/cudaarithm.hpp"
#include "opencv2/video.hpp"
#ifdef GTEST_CREATE_SHARED_LIBRARY

View File

@ -47,84 +47,148 @@ using namespace cv::cuda;
#if !defined (HAVE_CUDA) || !defined (HAVE_OPENCV_CUDALEGACY) || defined (CUDA_DISABLER)
void cv::cuda::BroxOpticalFlow::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
Ptr<BroxOpticalFlow> cv::cuda::BroxOpticalFlow::create(double, double, double, int, int, int) { throw_no_cuda(); return Ptr<BroxOpticalFlow>(); }
#else
namespace
{
size_t getBufSize(const NCVBroxOpticalFlowDescriptor& desc, const NCVMatrix<Ncv32f>& frame0, const NCVMatrix<Ncv32f>& frame1,
NCVMatrix<Ncv32f>& u, NCVMatrix<Ncv32f>& v, const cudaDeviceProp& devProp)
namespace {
class BroxOpticalFlowImpl : public BroxOpticalFlow
{
NCVMemStackAllocator gpuCounter(static_cast<Ncv32u>(devProp.textureAlignment));
public:
BroxOpticalFlowImpl(double alpha, double gamma, double scale_factor,
int inner_iterations, int outer_iterations, int solver_iterations) :
alpha_(alpha), gamma_(gamma), scale_factor_(scale_factor),
inner_iterations_(inner_iterations), outer_iterations_(outer_iterations),
solver_iterations_(solver_iterations)
{
}
virtual void calc(InputArray I0, InputArray I1, InputOutputArray flow, Stream& stream);
virtual double getFlowSmoothness() const { return alpha_; }
virtual void setFlowSmoothness(double alpha) { alpha_ = static_cast<float>(alpha); }
virtual double getGradientConstancyImportance() const { return gamma_; }
virtual void setGradientConstancyImportance(double gamma) { gamma_ = static_cast<float>(gamma); }
virtual double getPyramidScaleFactor() const { return scale_factor_; }
virtual void setPyramidScaleFactor(double scale_factor) { scale_factor_ = static_cast<float>(scale_factor); }
//! number of lagged non-linearity iterations (inner loop)
virtual int getInnerIterations() const { return inner_iterations_; }
virtual void setInnerIterations(int inner_iterations) { inner_iterations_ = inner_iterations; }
//! number of warping iterations (number of pyramid levels)
virtual int getOuterIterations() const { return outer_iterations_; }
virtual void setOuterIterations(int outer_iterations) { outer_iterations_ = outer_iterations; }
//! number of linear system solver iterations
virtual int getSolverIterations() const { return solver_iterations_; }
virtual void setSolverIterations(int solver_iterations) { solver_iterations_ = solver_iterations; }
private:
//! flow smoothness
float alpha_;
//! gradient constancy importance
float gamma_;
//! pyramid scale factor
float scale_factor_;
//! number of lagged non-linearity iterations (inner loop)
int inner_iterations_;
//! number of warping iterations (number of pyramid levels)
int outer_iterations_;
//! number of linear system solver iterations
int solver_iterations_;
};
static size_t getBufSize(const NCVBroxOpticalFlowDescriptor& desc,
const NCVMatrix<Ncv32f>& frame0, const NCVMatrix<Ncv32f>& frame1,
NCVMatrix<Ncv32f>& u, NCVMatrix<Ncv32f>& v,
size_t textureAlignment)
{
NCVMemStackAllocator gpuCounter(static_cast<Ncv32u>(textureAlignment));
ncvSafeCall( NCVBroxOpticalFlow(desc, gpuCounter, frame0, frame1, u, v, 0) );
return gpuCounter.maxSize();
}
static void outputHandler(const String &msg)
{
CV_Error(cv::Error::GpuApiCallError, msg.c_str());
}
void BroxOpticalFlowImpl::calc(InputArray _I0, InputArray _I1, InputOutputArray _flow, Stream& stream)
{
const GpuMat frame0 = _I0.getGpuMat();
const GpuMat frame1 = _I1.getGpuMat();
CV_Assert( frame0.type() == CV_32FC1 );
CV_Assert( frame1.size() == frame0.size() && frame1.type() == frame0.type() );
ncvSetDebugOutputHandler(outputHandler);
BufferPool pool(stream);
GpuMat u = pool.getBuffer(frame0.size(), CV_32FC1);
GpuMat v = pool.getBuffer(frame0.size(), CV_32FC1);
NCVBroxOpticalFlowDescriptor desc;
desc.alpha = alpha_;
desc.gamma = gamma_;
desc.scale_factor = scale_factor_;
desc.number_of_inner_iterations = inner_iterations_;
desc.number_of_outer_iterations = outer_iterations_;
desc.number_of_solver_iterations = solver_iterations_;
NCVMemSegment frame0MemSeg;
frame0MemSeg.begin.memtype = NCVMemoryTypeDevice;
frame0MemSeg.begin.ptr = const_cast<uchar*>(frame0.data);
frame0MemSeg.size = frame0.step * frame0.rows;
NCVMemSegment frame1MemSeg;
frame1MemSeg.begin.memtype = NCVMemoryTypeDevice;
frame1MemSeg.begin.ptr = const_cast<uchar*>(frame1.data);
frame1MemSeg.size = frame1.step * frame1.rows;
NCVMemSegment uMemSeg;
uMemSeg.begin.memtype = NCVMemoryTypeDevice;
uMemSeg.begin.ptr = u.ptr();
uMemSeg.size = u.step * u.rows;
NCVMemSegment vMemSeg;
vMemSeg.begin.memtype = NCVMemoryTypeDevice;
vMemSeg.begin.ptr = v.ptr();
vMemSeg.size = v.step * v.rows;
DeviceInfo devInfo;
size_t textureAlignment = devInfo.textureAlignment();
NCVMatrixReuse<Ncv32f> frame0Mat(frame0MemSeg, static_cast<Ncv32u>(textureAlignment), frame0.cols, frame0.rows, static_cast<Ncv32u>(frame0.step));
NCVMatrixReuse<Ncv32f> frame1Mat(frame1MemSeg, static_cast<Ncv32u>(textureAlignment), frame1.cols, frame1.rows, static_cast<Ncv32u>(frame1.step));
NCVMatrixReuse<Ncv32f> uMat(uMemSeg, static_cast<Ncv32u>(textureAlignment), u.cols, u.rows, static_cast<Ncv32u>(u.step));
NCVMatrixReuse<Ncv32f> vMat(vMemSeg, static_cast<Ncv32u>(textureAlignment), v.cols, v.rows, static_cast<Ncv32u>(v.step));
size_t bufSize = getBufSize(desc, frame0Mat, frame1Mat, uMat, vMat, textureAlignment);
GpuMat buf = pool.getBuffer(1, static_cast<int>(bufSize), CV_8UC1);
NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, static_cast<Ncv32u>(textureAlignment), buf.ptr());
ncvSafeCall( NCVBroxOpticalFlow(desc, gpuAllocator, frame0Mat, frame1Mat, uMat, vMat, StreamAccessor::getStream(stream)) );
GpuMat flows[] = {u, v};
cuda::merge(flows, 2, _flow, stream);
}
}
namespace
Ptr<BroxOpticalFlow> cv::cuda::BroxOpticalFlow::create(double alpha, double gamma, double scale_factor, int inner_iterations, int outer_iterations, int solver_iterations)
{
static void outputHandler(const String &msg) { CV_Error(cv::Error::GpuApiCallError, msg.c_str()); }
}
void cv::cuda::BroxOpticalFlow::operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& s)
{
ncvSetDebugOutputHandler(outputHandler);
CV_Assert(frame0.type() == CV_32FC1);
CV_Assert(frame1.size() == frame0.size() && frame1.type() == frame0.type());
u.create(frame0.size(), CV_32FC1);
v.create(frame0.size(), CV_32FC1);
cudaDeviceProp devProp;
cudaSafeCall( cudaGetDeviceProperties(&devProp, getDevice()) );
NCVBroxOpticalFlowDescriptor desc;
desc.alpha = alpha;
desc.gamma = gamma;
desc.scale_factor = scale_factor;
desc.number_of_inner_iterations = inner_iterations;
desc.number_of_outer_iterations = outer_iterations;
desc.number_of_solver_iterations = solver_iterations;
NCVMemSegment frame0MemSeg;
frame0MemSeg.begin.memtype = NCVMemoryTypeDevice;
frame0MemSeg.begin.ptr = const_cast<uchar*>(frame0.data);
frame0MemSeg.size = frame0.step * frame0.rows;
NCVMemSegment frame1MemSeg;
frame1MemSeg.begin.memtype = NCVMemoryTypeDevice;
frame1MemSeg.begin.ptr = const_cast<uchar*>(frame1.data);
frame1MemSeg.size = frame1.step * frame1.rows;
NCVMemSegment uMemSeg;
uMemSeg.begin.memtype = NCVMemoryTypeDevice;
uMemSeg.begin.ptr = u.ptr();
uMemSeg.size = u.step * u.rows;
NCVMemSegment vMemSeg;
vMemSeg.begin.memtype = NCVMemoryTypeDevice;
vMemSeg.begin.ptr = v.ptr();
vMemSeg.size = v.step * v.rows;
NCVMatrixReuse<Ncv32f> frame0Mat(frame0MemSeg, static_cast<Ncv32u>(devProp.textureAlignment), frame0.cols, frame0.rows, static_cast<Ncv32u>(frame0.step));
NCVMatrixReuse<Ncv32f> frame1Mat(frame1MemSeg, static_cast<Ncv32u>(devProp.textureAlignment), frame1.cols, frame1.rows, static_cast<Ncv32u>(frame1.step));
NCVMatrixReuse<Ncv32f> uMat(uMemSeg, static_cast<Ncv32u>(devProp.textureAlignment), u.cols, u.rows, static_cast<Ncv32u>(u.step));
NCVMatrixReuse<Ncv32f> vMat(vMemSeg, static_cast<Ncv32u>(devProp.textureAlignment), v.cols, v.rows, static_cast<Ncv32u>(v.step));
cudaStream_t stream = StreamAccessor::getStream(s);
size_t bufSize = getBufSize(desc, frame0Mat, frame1Mat, uMat, vMat, devProp);
ensureSizeIsEnough(1, static_cast<int>(bufSize), CV_8UC1, buf);
NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, static_cast<Ncv32u>(devProp.textureAlignment), buf.ptr());
ncvSafeCall( NCVBroxOpticalFlow(desc, gpuAllocator, frame0Mat, frame1Mat, uMat, vMat, stream) );
return makePtr<BroxOpticalFlowImpl>(alpha, gamma, scale_factor, inner_iterations, outer_iterations, solver_iterations);
}
#endif /* HAVE_CUDA */

View File

@ -472,16 +472,16 @@ namespace pyrlk
}
}
void loadConstants(int2 winSize, int iters)
void loadConstants(int2 winSize, int iters, cudaStream_t stream)
{
cudaSafeCall( cudaMemcpyToSymbol(c_winSize_x, &winSize.x, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(c_winSize_y, &winSize.y, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbolAsync(c_winSize_x, &winSize.x, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
cudaSafeCall( cudaMemcpyToSymbolAsync(c_winSize_y, &winSize.y, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2);
cudaSafeCall( cudaMemcpyToSymbol(c_halfWin_x, &halfWin.x, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(c_halfWin_y, &halfWin.y, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbolAsync(c_halfWin_x, &halfWin.x, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
cudaSafeCall( cudaMemcpyToSymbolAsync(c_halfWin_y, &halfWin.y, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
cudaSafeCall( cudaMemcpyToSymbol(c_iters, &iters, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbolAsync(c_iters, &iters, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
}
void sparse1(PtrStepSzf I, PtrStepSzf J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,

View File

@ -66,15 +66,16 @@ namespace tvl1flow
dy(y, x) = 0.5f * (src(::min(y + 1, src.rows - 1), x) - src(::max(y - 1, 0), x));
}
void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy)
void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy, cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
centeredGradientKernel<<<grid, block>>>(src, dx, dy);
centeredGradientKernel<<<grid, block, 0, stream>>>(src, dx, dy);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
if (!stream)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
@ -164,7 +165,10 @@ namespace tvl1flow
rho(y, x) = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
}
void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx, PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho)
void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y,
PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx,
PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho,
cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(I0.cols, block.x), divUp(I0.rows, block.y));
@ -173,10 +177,11 @@ namespace tvl1flow
bindTexture(&tex_I1x, I1x);
bindTexture(&tex_I1y, I1y);
warpBackwardKernel<<<grid, block>>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho);
warpBackwardKernel<<<grid, block, 0, stream>>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
if (!stream)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
@ -292,15 +297,17 @@ namespace tvl1flow
PtrStepSzf grad, PtrStepSzf rho_c,
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32,
PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf error,
float l_t, float theta, float gamma, bool calcError)
float l_t, float theta, float gamma, bool calcError,
cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y));
estimateUKernel<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, error, l_t, theta, gamma, calcError);
estimateUKernel<<<grid, block, 0, stream>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, error, l_t, theta, gamma, calcError);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
if (!stream)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
@ -346,15 +353,19 @@ namespace tvl1flow
}
}
void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32, float taut, float gamma)
void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3,
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32,
float taut, float gamma,
cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(u1.cols, block.x), divUp(u1.rows, block.y));
estimateDualVariablesKernel<<<grid, block>>>(u1, u2, u3, p11, p12, p21, p22, p31, p32, taut, gamma);
estimateDualVariablesKernel<<<grid, block, 0, stream>>>(u1, u2, u3, p11, p12, p21, p22, p31, p32, taut, gamma);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
if (!stream)
cudaSafeCall( cudaDeviceSynchronize() );
}
}

View File

@ -42,23 +42,21 @@
#include "precomp.hpp"
#define MIN_SIZE 32
#define S(x) StreamAccessor::getStream(x)
// CUDA resize() is fast, but it differs from the CPU analog. Disabling this flag
// leads to an inefficient code. It's for debug purposes only.
#define ENABLE_CUDA_RESIZE 1
using namespace cv;
using namespace cv::cuda;
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
void cv::cuda::FarnebackOpticalFlow::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
Ptr<FarnebackOpticalFlow> cv::cuda::FarnebackOpticalFlow::create(int, double, bool, int, int, int, double, int) { throw_no_cuda(); return Ptr<BroxOpticalFlow>(); }
#else
#define MIN_SIZE 32
// CUDA resize() is fast, but it differs from the CPU analog. Disabling this flag
// leads to an inefficient code. It's for debug purposes only.
#define ENABLE_CUDA_RESIZE 1
namespace cv { namespace cuda { namespace device { namespace optflow_farneback
{
void setPolynomialExpansionConsts(
@ -76,8 +74,6 @@ namespace cv { namespace cuda { namespace device { namespace optflow_farneback
void updateFlowGpu(
const PtrStepSzf M, PtrStepSzf flowx, PtrStepSzf flowy, cudaStream_t stream);
/*void boxFilterGpu(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream);*/
void boxFilter5Gpu(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream);
void boxFilter5Gpu_CC11(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream);
@ -93,10 +89,93 @@ namespace cv { namespace cuda { namespace device { namespace optflow_farneback
void gaussianBlur5Gpu_CC11(
const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderType, cudaStream_t stream);
}}}} // namespace cv { namespace cuda { namespace cudev { namespace optflow_farneback
}}}}
namespace
{
class FarnebackOpticalFlowImpl : public FarnebackOpticalFlow
{
public:
FarnebackOpticalFlowImpl(int numLevels, double pyrScale, bool fastPyramids, int winSize,
int numIters, int polyN, double polySigma, int flags) :
numLevels_(numLevels), pyrScale_(pyrScale), fastPyramids_(fastPyramids), winSize_(winSize),
numIters_(numIters), polyN_(polyN), polySigma_(polySigma), flags_(flags)
{
}
virtual int getNumLevels() const { return numLevels_; }
virtual void setNumLevels(int numLevels) { numLevels_ = numLevels; }
virtual double getPyrScale() const { return pyrScale_; }
virtual void setPyrScale(double pyrScale) { pyrScale_ = pyrScale; }
virtual bool getFastPyramids() const { return fastPyramids_; }
virtual void setFastPyramids(bool fastPyramids) { fastPyramids_ = fastPyramids; }
virtual int getWinSize() const { return winSize_; }
virtual void setWinSize(int winSize) { winSize_ = winSize; }
virtual int getNumIters() const { return numIters_; }
virtual void setNumIters(int numIters) { numIters_ = numIters; }
virtual int getPolyN() const { return polyN_; }
virtual void setPolyN(int polyN) { polyN_ = polyN; }
virtual double getPolySigma() const { return polySigma_; }
virtual void setPolySigma(double polySigma) { polySigma_ = polySigma; }
virtual int getFlags() const { return flags_; }
virtual void setFlags(int flags) { flags_ = flags; }
virtual void calc(InputArray I0, InputArray I1, InputOutputArray flow, Stream& stream);
private:
int numLevels_;
double pyrScale_;
bool fastPyramids_;
int winSize_;
int numIters_;
int polyN_;
double polySigma_;
int flags_;
private:
void prepareGaussian(
int n, double sigma, float *g, float *xg, float *xxg,
double &ig11, double &ig03, double &ig33, double &ig55);
void setPolynomialExpansionConsts(int n, double sigma);
void updateFlow_boxFilter(
const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy,
GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
void updateFlow_gaussianBlur(
const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy,
GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
void calcImpl(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &stream);
GpuMat frames_[2];
GpuMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
std::vector<GpuMat> pyramid0_, pyramid1_;
};
void FarnebackOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOutputArray _flow, Stream& stream)
{
const GpuMat frame0 = _frame0.getGpuMat();
const GpuMat frame1 = _frame1.getGpuMat();
BufferPool pool(stream);
GpuMat flowx = pool.getBuffer(frame0.size(), CV_32FC1);
GpuMat flowy = pool.getBuffer(frame0.size(), CV_32FC1);
calcImpl(frame0, frame1, flowx, flowy, stream);
GpuMat flows[] = {flowx, flowy};
cuda::merge(flows, 2, _flow, stream);
}
GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat& mat)
{
if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols)
@ -104,285 +183,287 @@ namespace
return mat = GpuMat(rows, cols, type);
}
}
void cv::cuda::FarnebackOpticalFlow::prepareGaussian(
int n, double sigma, float *g, float *xg, float *xxg,
double &ig11, double &ig03, double &ig33, double &ig55)
{
double s = 0.;
for (int x = -n; x <= n; x++)
{
g[x] = (float)std::exp(-x*x/(2*sigma*sigma));
s += g[x];
}
s = 1./s;
for (int x = -n; x <= n; x++)
{
g[x] = (float)(g[x]*s);
xg[x] = (float)(x*g[x]);
xxg[x] = (float)(x*x*g[x]);
}
Mat_<double> G(6, 6);
G.setTo(0);
for (int y = -n; y <= n; y++)
void FarnebackOpticalFlowImpl::prepareGaussian(
int n, double sigma, float *g, float *xg, float *xxg,
double &ig11, double &ig03, double &ig33, double &ig55)
{
double s = 0.;
for (int x = -n; x <= n; x++)
{
G(0,0) += g[y]*g[x];
G(1,1) += g[y]*g[x]*x*x;
G(3,3) += g[y]*g[x]*x*x*x*x;
G(5,5) += g[y]*g[x]*x*x*y*y;
}
}
//G[0][0] = 1.;
G(2,2) = G(0,3) = G(0,4) = G(3,0) = G(4,0) = G(1,1);
G(4,4) = G(3,3);
G(3,4) = G(4,3) = G(5,5);
// invG:
// [ x e e ]
// [ y ]
// [ y ]
// [ e z ]
// [ e z ]
// [ u ]
Mat_<double> invG = G.inv(DECOMP_CHOLESKY);
ig11 = invG(1,1);
ig03 = invG(0,3);
ig33 = invG(3,3);
ig55 = invG(5,5);
}
void cv::cuda::FarnebackOpticalFlow::setPolynomialExpansionConsts(int n, double sigma)
{
std::vector<float> buf(n*6 + 3);
float* g = &buf[0] + n;
float* xg = g + n*2 + 1;
float* xxg = xg + n*2 + 1;
if (sigma < FLT_EPSILON)
sigma = n*0.3;
double ig11, ig03, ig33, ig55;
prepareGaussian(n, sigma, g, xg, xxg, ig11, ig03, ig33, ig55);
device::optflow_farneback::setPolynomialExpansionConsts(n, g, xg, xxg, static_cast<float>(ig11), static_cast<float>(ig03), static_cast<float>(ig33), static_cast<float>(ig55));
}
void cv::cuda::FarnebackOpticalFlow::updateFlow_boxFilter(
const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy,
GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[])
{
if (deviceSupports(FEATURE_SET_COMPUTE_12))
device::optflow_farneback::boxFilter5Gpu(M, blockSize/2, bufM, S(streams[0]));
else
device::optflow_farneback::boxFilter5Gpu_CC11(M, blockSize/2, bufM, S(streams[0]));
swap(M, bufM);
for (int i = 1; i < 5; ++i)
streams[i].waitForCompletion();
device::optflow_farneback::updateFlowGpu(M, flowx, flowy, S(streams[0]));
if (updateMatrices)
device::optflow_farneback::updateMatricesGpu(flowx, flowy, R0, R1, M, S(streams[0]));
}
void cv::cuda::FarnebackOpticalFlow::updateFlow_gaussianBlur(
const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy,
GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[])
{
if (deviceSupports(FEATURE_SET_COMPUTE_12))
device::optflow_farneback::gaussianBlur5Gpu(
M, blockSize/2, bufM, BORDER_REPLICATE, S(streams[0]));
else
device::optflow_farneback::gaussianBlur5Gpu_CC11(
M, blockSize/2, bufM, BORDER_REPLICATE, S(streams[0]));
swap(M, bufM);
device::optflow_farneback::updateFlowGpu(M, flowx, flowy, S(streams[0]));
if (updateMatrices)
device::optflow_farneback::updateMatricesGpu(flowx, flowy, R0, R1, M, S(streams[0]));
}
void cv::cuda::FarnebackOpticalFlow::operator ()(
const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &s)
{
CV_Assert(frame0.channels() == 1 && frame1.channels() == 1);
CV_Assert(frame0.size() == frame1.size());
CV_Assert(polyN == 5 || polyN == 7);
CV_Assert(!fastPyramids || std::abs(pyrScale - 0.5) < 1e-6);
Stream streams[5];
if (S(s))
streams[0] = s;
Size size = frame0.size();
GpuMat prevFlowX, prevFlowY, curFlowX, curFlowY;
flowx.create(size, CV_32F);
flowy.create(size, CV_32F);
GpuMat flowx0 = flowx;
GpuMat flowy0 = flowy;
// Crop unnecessary levels
double scale = 1;
int numLevelsCropped = 0;
for (; numLevelsCropped < numLevels; numLevelsCropped++)
{
scale *= pyrScale;
if (size.width*scale < MIN_SIZE || size.height*scale < MIN_SIZE)
break;
}
frame0.convertTo(frames_[0], CV_32F, streams[0]);
frame1.convertTo(frames_[1], CV_32F, streams[1]);
if (fastPyramids)
{
// Build Gaussian pyramids using pyrDown()
pyramid0_.resize(numLevelsCropped + 1);
pyramid1_.resize(numLevelsCropped + 1);
pyramid0_[0] = frames_[0];
pyramid1_[0] = frames_[1];
for (int i = 1; i <= numLevelsCropped; ++i)
{
cuda::pyrDown(pyramid0_[i - 1], pyramid0_[i], streams[0]);
cuda::pyrDown(pyramid1_[i - 1], pyramid1_[i], streams[1]);
}
}
setPolynomialExpansionConsts(polyN, polySigma);
device::optflow_farneback::setUpdateMatricesConsts();
for (int k = numLevelsCropped; k >= 0; k--)
{
streams[0].waitForCompletion();
scale = 1;
for (int i = 0; i < k; i++)
scale *= pyrScale;
double sigma = (1./scale - 1) * 0.5;
int smoothSize = cvRound(sigma*5) | 1;
smoothSize = std::max(smoothSize, 3);
int width = cvRound(size.width*scale);
int height = cvRound(size.height*scale);
if (fastPyramids)
{
width = pyramid0_[k].cols;
height = pyramid0_[k].rows;
g[x] = (float)std::exp(-x*x/(2*sigma*sigma));
s += g[x];
}
if (k > 0)
s = 1./s;
for (int x = -n; x <= n; x++)
{
curFlowX.create(height, width, CV_32F);
curFlowY.create(height, width, CV_32F);
}
else
{
curFlowX = flowx0;
curFlowY = flowy0;
g[x] = (float)(g[x]*s);
xg[x] = (float)(x*g[x]);
xxg[x] = (float)(x*x*g[x]);
}
if (!prevFlowX.data)
Mat_<double> G(6, 6);
G.setTo(0);
for (int y = -n; y <= n; y++)
{
if (flags & OPTFLOW_USE_INITIAL_FLOW)
for (int x = -n; x <= n; x++)
{
cuda::resize(flowx0, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);
cuda::resize(flowy0, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);
curFlowX.convertTo(curFlowX, curFlowX.depth(), scale, streams[0]);
curFlowY.convertTo(curFlowY, curFlowY.depth(), scale, streams[1]);
G(0,0) += g[y]*g[x];
G(1,1) += g[y]*g[x]*x*x;
G(3,3) += g[y]*g[x]*x*x*x*x;
G(5,5) += g[y]*g[x]*x*x*y*y;
}
}
//G[0][0] = 1.;
G(2,2) = G(0,3) = G(0,4) = G(3,0) = G(4,0) = G(1,1);
G(4,4) = G(3,3);
G(3,4) = G(4,3) = G(5,5);
// invG:
// [ x e e ]
// [ y ]
// [ y ]
// [ e z ]
// [ e z ]
// [ u ]
Mat_<double> invG = G.inv(DECOMP_CHOLESKY);
ig11 = invG(1,1);
ig03 = invG(0,3);
ig33 = invG(3,3);
ig55 = invG(5,5);
}
void FarnebackOpticalFlowImpl::setPolynomialExpansionConsts(int n, double sigma)
{
std::vector<float> buf(n*6 + 3);
float* g = &buf[0] + n;
float* xg = g + n*2 + 1;
float* xxg = xg + n*2 + 1;
if (sigma < FLT_EPSILON)
sigma = n*0.3;
double ig11, ig03, ig33, ig55;
prepareGaussian(n, sigma, g, xg, xxg, ig11, ig03, ig33, ig55);
device::optflow_farneback::setPolynomialExpansionConsts(n, g, xg, xxg, static_cast<float>(ig11), static_cast<float>(ig03), static_cast<float>(ig33), static_cast<float>(ig55));
}
void FarnebackOpticalFlowImpl::updateFlow_boxFilter(
const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy,
GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[])
{
if (deviceSupports(FEATURE_SET_COMPUTE_12))
device::optflow_farneback::boxFilter5Gpu(M, blockSize/2, bufM, StreamAccessor::getStream(streams[0]));
else
device::optflow_farneback::boxFilter5Gpu_CC11(M, blockSize/2, bufM, StreamAccessor::getStream(streams[0]));
swap(M, bufM);
for (int i = 1; i < 5; ++i)
streams[i].waitForCompletion();
device::optflow_farneback::updateFlowGpu(M, flowx, flowy, StreamAccessor::getStream(streams[0]));
if (updateMatrices)
device::optflow_farneback::updateMatricesGpu(flowx, flowy, R0, R1, M, StreamAccessor::getStream(streams[0]));
}
void FarnebackOpticalFlowImpl::updateFlow_gaussianBlur(
const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy,
GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[])
{
if (deviceSupports(FEATURE_SET_COMPUTE_12))
device::optflow_farneback::gaussianBlur5Gpu(
M, blockSize/2, bufM, BORDER_REPLICATE, StreamAccessor::getStream(streams[0]));
else
device::optflow_farneback::gaussianBlur5Gpu_CC11(
M, blockSize/2, bufM, BORDER_REPLICATE, StreamAccessor::getStream(streams[0]));
swap(M, bufM);
device::optflow_farneback::updateFlowGpu(M, flowx, flowy, StreamAccessor::getStream(streams[0]));
if (updateMatrices)
device::optflow_farneback::updateMatricesGpu(flowx, flowy, R0, R1, M, StreamAccessor::getStream(streams[0]));
}
void FarnebackOpticalFlowImpl::calcImpl(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &stream)
{
CV_Assert(frame0.channels() == 1 && frame1.channels() == 1);
CV_Assert(frame0.size() == frame1.size());
CV_Assert(polyN_ == 5 || polyN_ == 7);
CV_Assert(!fastPyramids_ || std::abs(pyrScale_ - 0.5) < 1e-6);
Stream streams[5];
if (stream)
streams[0] = stream;
Size size = frame0.size();
GpuMat prevFlowX, prevFlowY, curFlowX, curFlowY;
flowx.create(size, CV_32F);
flowy.create(size, CV_32F);
GpuMat flowx0 = flowx;
GpuMat flowy0 = flowy;
// Crop unnecessary levels
double scale = 1;
int numLevelsCropped = 0;
for (; numLevelsCropped < numLevels_; numLevelsCropped++)
{
scale *= pyrScale_;
if (size.width*scale < MIN_SIZE || size.height*scale < MIN_SIZE)
break;
}
frame0.convertTo(frames_[0], CV_32F, streams[0]);
frame1.convertTo(frames_[1], CV_32F, streams[1]);
if (fastPyramids_)
{
// Build Gaussian pyramids using pyrDown()
pyramid0_.resize(numLevelsCropped + 1);
pyramid1_.resize(numLevelsCropped + 1);
pyramid0_[0] = frames_[0];
pyramid1_[0] = frames_[1];
for (int i = 1; i <= numLevelsCropped; ++i)
{
cuda::pyrDown(pyramid0_[i - 1], pyramid0_[i], streams[0]);
cuda::pyrDown(pyramid1_[i - 1], pyramid1_[i], streams[1]);
}
}
setPolynomialExpansionConsts(polyN_, polySigma_);
device::optflow_farneback::setUpdateMatricesConsts();
for (int k = numLevelsCropped; k >= 0; k--)
{
streams[0].waitForCompletion();
scale = 1;
for (int i = 0; i < k; i++)
scale *= pyrScale_;
double sigma = (1./scale - 1) * 0.5;
int smoothSize = cvRound(sigma*5) | 1;
smoothSize = std::max(smoothSize, 3);
int width = cvRound(size.width*scale);
int height = cvRound(size.height*scale);
if (fastPyramids_)
{
width = pyramid0_[k].cols;
height = pyramid0_[k].rows;
}
if (k > 0)
{
curFlowX.create(height, width, CV_32F);
curFlowY.create(height, width, CV_32F);
}
else
{
curFlowX.setTo(0, streams[0]);
curFlowY.setTo(0, streams[1]);
curFlowX = flowx0;
curFlowY = flowy0;
}
}
else
{
cuda::resize(prevFlowX, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);
cuda::resize(prevFlowY, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);
curFlowX.convertTo(curFlowX, curFlowX.depth(), 1./pyrScale, streams[0]);
curFlowY.convertTo(curFlowY, curFlowY.depth(), 1./pyrScale, streams[1]);
}
GpuMat M = allocMatFromBuf(5*height, width, CV_32F, M_);
GpuMat bufM = allocMatFromBuf(5*height, width, CV_32F, bufM_);
GpuMat R[2] =
{
allocMatFromBuf(5*height, width, CV_32F, R_[0]),
allocMatFromBuf(5*height, width, CV_32F, R_[1])
};
if (fastPyramids)
{
device::optflow_farneback::polynomialExpansionGpu(pyramid0_[k], polyN, R[0], S(streams[0]));
device::optflow_farneback::polynomialExpansionGpu(pyramid1_[k], polyN, R[1], S(streams[1]));
}
else
{
GpuMat blurredFrame[2] =
if (!prevFlowX.data)
{
allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[0]),
allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[1])
};
GpuMat pyrLevel[2] =
{
allocMatFromBuf(height, width, CV_32F, pyrLevel_[0]),
allocMatFromBuf(height, width, CV_32F, pyrLevel_[1])
};
Mat g = getGaussianKernel(smoothSize, sigma, CV_32F);
device::optflow_farneback::setGaussianBlurKernel(g.ptr<float>(smoothSize/2), smoothSize/2);
for (int i = 0; i < 2; i++)
{
device::optflow_farneback::gaussianBlurGpu(
frames_[i], smoothSize/2, blurredFrame[i], BORDER_REFLECT101, S(streams[i]));
cuda::resize(blurredFrame[i], pyrLevel[i], Size(width, height), 0.0, 0.0, INTER_LINEAR, streams[i]);
device::optflow_farneback::polynomialExpansionGpu(pyrLevel[i], polyN, R[i], S(streams[i]));
if (flags_ & OPTFLOW_USE_INITIAL_FLOW)
{
cuda::resize(flowx0, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);
cuda::resize(flowy0, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);
curFlowX.convertTo(curFlowX, curFlowX.depth(), scale, streams[0]);
curFlowY.convertTo(curFlowY, curFlowY.depth(), scale, streams[1]);
}
else
{
curFlowX.setTo(0, streams[0]);
curFlowY.setTo(0, streams[1]);
}
}
}
streams[1].waitForCompletion();
device::optflow_farneback::updateMatricesGpu(curFlowX, curFlowY, R[0], R[1], M, S(streams[0]));
if (flags & OPTFLOW_FARNEBACK_GAUSSIAN)
{
Mat g = getGaussianKernel(winSize, winSize/2*0.3f, CV_32F);
device::optflow_farneback::setGaussianBlurKernel(g.ptr<float>(winSize/2), winSize/2);
}
for (int i = 0; i < numIters; i++)
{
if (flags & OPTFLOW_FARNEBACK_GAUSSIAN)
updateFlow_gaussianBlur(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize, i < numIters-1, streams);
else
updateFlow_boxFilter(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize, i < numIters-1, streams);
{
cuda::resize(prevFlowX, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);
cuda::resize(prevFlowY, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);
curFlowX.convertTo(curFlowX, curFlowX.depth(), 1./pyrScale_, streams[0]);
curFlowY.convertTo(curFlowY, curFlowY.depth(), 1./pyrScale_, streams[1]);
}
GpuMat M = allocMatFromBuf(5*height, width, CV_32F, M_);
GpuMat bufM = allocMatFromBuf(5*height, width, CV_32F, bufM_);
GpuMat R[2] =
{
allocMatFromBuf(5*height, width, CV_32F, R_[0]),
allocMatFromBuf(5*height, width, CV_32F, R_[1])
};
if (fastPyramids_)
{
device::optflow_farneback::polynomialExpansionGpu(pyramid0_[k], polyN_, R[0], StreamAccessor::getStream(streams[0]));
device::optflow_farneback::polynomialExpansionGpu(pyramid1_[k], polyN_, R[1], StreamAccessor::getStream(streams[1]));
}
else
{
GpuMat blurredFrame[2] =
{
allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[0]),
allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[1])
};
GpuMat pyrLevel[2] =
{
allocMatFromBuf(height, width, CV_32F, pyrLevel_[0]),
allocMatFromBuf(height, width, CV_32F, pyrLevel_[1])
};
Mat g = getGaussianKernel(smoothSize, sigma, CV_32F);
device::optflow_farneback::setGaussianBlurKernel(g.ptr<float>(smoothSize/2), smoothSize/2);
for (int i = 0; i < 2; i++)
{
device::optflow_farneback::gaussianBlurGpu(
frames_[i], smoothSize/2, blurredFrame[i], BORDER_REFLECT101, StreamAccessor::getStream(streams[i]));
cuda::resize(blurredFrame[i], pyrLevel[i], Size(width, height), 0.0, 0.0, INTER_LINEAR, streams[i]);
device::optflow_farneback::polynomialExpansionGpu(pyrLevel[i], polyN_, R[i], StreamAccessor::getStream(streams[i]));
}
}
streams[1].waitForCompletion();
device::optflow_farneback::updateMatricesGpu(curFlowX, curFlowY, R[0], R[1], M, StreamAccessor::getStream(streams[0]));
if (flags_ & OPTFLOW_FARNEBACK_GAUSSIAN)
{
Mat g = getGaussianKernel(winSize_, winSize_/2*0.3f, CV_32F);
device::optflow_farneback::setGaussianBlurKernel(g.ptr<float>(winSize_/2), winSize_/2);
}
for (int i = 0; i < numIters_; i++)
{
if (flags_ & OPTFLOW_FARNEBACK_GAUSSIAN)
updateFlow_gaussianBlur(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize_, i < numIters_-1, streams);
else
updateFlow_boxFilter(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize_, i < numIters_-1, streams);
}
prevFlowX = curFlowX;
prevFlowY = curFlowY;
}
prevFlowX = curFlowX;
prevFlowY = curFlowY;
flowx = curFlowX;
flowy = curFlowY;
if (!stream)
streams[0].waitForCompletion();
}
}
flowx = curFlowX;
flowy = curFlowY;
if (!S(s))
streams[0].waitForCompletion();
Ptr<FarnebackOpticalFlow> cv::cuda::FarnebackOpticalFlow::create(int numLevels, double pyrScale, bool fastPyramids, int winSize,
int numIters, int polyN, double polySigma, int flags)
{
return makePtr<FarnebackOpticalFlowImpl>(numLevels, pyrScale, fastPyramids, winSize,
numIters, polyN, polySigma, flags);
}
#endif

View File

@ -47,37 +47,54 @@ using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
cv::cuda::PyrLKOpticalFlow::PyrLKOpticalFlow() { throw_no_cuda(); }
void cv::cuda::PyrLKOpticalFlow::sparse(const GpuMat&, const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat*) { throw_no_cuda(); }
void cv::cuda::PyrLKOpticalFlow::dense(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat*) { throw_no_cuda(); }
void cv::cuda::PyrLKOpticalFlow::releaseMemory() {}
Ptr<SparsePyrLKOpticalFlow> cv::cuda::SparsePyrLKOpticalFlow::create(Size, int, int, bool) { throw_no_cuda(); return Ptr<SparsePyrLKOpticalFlow>(); }
Ptr<DensePyrLKOpticalFlow> cv::cuda::DensePyrLKOpticalFlow::create(Size, int, int, bool) { throw_no_cuda(); return Ptr<SparsePyrLKOpticalFlow>(); }
#else /* !defined (HAVE_CUDA) */
namespace pyrlk
{
void loadConstants(int2 winSize, int iters);
void loadConstants(int2 winSize, int iters, cudaStream_t stream);
void sparse1(PtrStepSzf I, PtrStepSzf J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, dim3 patch, cudaStream_t stream = 0);
int level, dim3 block, dim3 patch, cudaStream_t stream);
void sparse4(PtrStepSz<float4> I, PtrStepSz<float4> J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, dim3 patch, cudaStream_t stream = 0);
int level, dim3 block, dim3 patch, cudaStream_t stream);
void dense(PtrStepSzb I, PtrStepSzf J, PtrStepSzf u, PtrStepSzf v, PtrStepSzf prevU, PtrStepSzf prevV,
PtrStepSzf err, int2 winSize, cudaStream_t stream = 0);
}
cv::cuda::PyrLKOpticalFlow::PyrLKOpticalFlow()
{
winSize = Size(21, 21);
maxLevel = 3;
iters = 30;
useInitialFlow = false;
PtrStepSzf err, int2 winSize, cudaStream_t stream);
}
namespace
{
void calcPatchSize(cv::Size winSize, dim3& block, dim3& patch)
class PyrLKOpticalFlowBase
{
public:
PyrLKOpticalFlowBase(Size winSize, int maxLevel, int iters, bool useInitialFlow);
void sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts,
GpuMat& status, GpuMat* err, Stream& stream);
void dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, Stream& stream);
protected:
Size winSize_;
int maxLevel_;
int iters_;
bool useInitialFlow_;
private:
std::vector<GpuMat> prevPyr_;
std::vector<GpuMat> nextPyr_;
};
PyrLKOpticalFlowBase::PyrLKOpticalFlowBase(Size winSize, int maxLevel, int iters, bool useInitialFlow) :
winSize_(winSize), maxLevel_(maxLevel), iters_(iters), useInitialFlow_(useInitialFlow)
{
}
void calcPatchSize(Size winSize, dim3& block, dim3& patch)
{
if (winSize.width > 32 && winSize.width > 2 * winSize.height)
{
@ -95,156 +112,239 @@ namespace
block.z = patch.z = 1;
}
}
void cv::cuda::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts, GpuMat& status, GpuMat* err)
{
if (prevPts.empty())
void PyrLKOpticalFlowBase::sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts, GpuMat& status, GpuMat* err, Stream& stream)
{
nextPts.release();
status.release();
if (err) err->release();
return;
}
dim3 block, patch;
calcPatchSize(winSize, block, patch);
CV_Assert(prevImg.channels() == 1 || prevImg.channels() == 3 || prevImg.channels() == 4);
CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type());
CV_Assert(maxLevel >= 0);
CV_Assert(winSize.width > 2 && winSize.height > 2);
CV_Assert(patch.x > 0 && patch.x < 6 && patch.y > 0 && patch.y < 6);
CV_Assert(prevPts.rows == 1 && prevPts.type() == CV_32FC2);
if (useInitialFlow)
CV_Assert(nextPts.size() == prevPts.size() && nextPts.type() == CV_32FC2);
else
ensureSizeIsEnough(1, prevPts.cols, prevPts.type(), nextPts);
GpuMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1);
GpuMat temp2 = nextPts.reshape(1);
cuda::multiply(temp1, Scalar::all(1.0 / (1 << maxLevel) / 2.0), temp2);
ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status);
status.setTo(Scalar::all(1));
if (err)
ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, *err);
// build the image pyramids.
prevPyr_.resize(maxLevel + 1);
nextPyr_.resize(maxLevel + 1);
int cn = prevImg.channels();
if (cn == 1 || cn == 4)
{
prevImg.convertTo(prevPyr_[0], CV_32F);
nextImg.convertTo(nextPyr_[0], CV_32F);
}
else
{
cuda::cvtColor(prevImg, buf_, COLOR_BGR2BGRA);
buf_.convertTo(prevPyr_[0], CV_32F);
cuda::cvtColor(nextImg, buf_, COLOR_BGR2BGRA);
buf_.convertTo(nextPyr_[0], CV_32F);
}
for (int level = 1; level <= maxLevel; ++level)
{
cuda::pyrDown(prevPyr_[level - 1], prevPyr_[level]);
cuda::pyrDown(nextPyr_[level - 1], nextPyr_[level]);
}
pyrlk::loadConstants(make_int2(winSize.width, winSize.height), iters);
for (int level = maxLevel; level >= 0; level--)
{
if (cn == 1)
if (prevPts.empty())
{
pyrlk::sparse1(prevPyr_[level], nextPyr_[level],
prevPts.ptr<float2>(), nextPts.ptr<float2>(), status.ptr(), level == 0 && err ? err->ptr<float>() : 0, prevPts.cols,
level, block, patch);
nextPts.release();
status.release();
if (err) err->release();
return;
}
dim3 block, patch;
calcPatchSize(winSize_, block, patch);
CV_Assert( prevImg.channels() == 1 || prevImg.channels() == 3 || prevImg.channels() == 4 );
CV_Assert( prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type() );
CV_Assert( maxLevel_ >= 0 );
CV_Assert( winSize_.width > 2 && winSize_.height > 2 );
CV_Assert( patch.x > 0 && patch.x < 6 && patch.y > 0 && patch.y < 6 );
CV_Assert( prevPts.rows == 1 && prevPts.type() == CV_32FC2 );
if (useInitialFlow_)
CV_Assert( nextPts.size() == prevPts.size() && nextPts.type() == prevPts.type() );
else
ensureSizeIsEnough(1, prevPts.cols, prevPts.type(), nextPts);
GpuMat temp1 = (useInitialFlow_ ? nextPts : prevPts).reshape(1);
GpuMat temp2 = nextPts.reshape(1);
cuda::multiply(temp1, Scalar::all(1.0 / (1 << maxLevel_) / 2.0), temp2, 1, -1, stream);
ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status);
status.setTo(Scalar::all(1), stream);
if (err)
ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, *err);
// build the image pyramids.
BufferPool pool(stream);
prevPyr_.resize(maxLevel_ + 1);
nextPyr_.resize(maxLevel_ + 1);
int cn = prevImg.channels();
if (cn == 1 || cn == 4)
{
prevImg.convertTo(prevPyr_[0], CV_32F, stream);
nextImg.convertTo(nextPyr_[0], CV_32F, stream);
}
else
{
pyrlk::sparse4(prevPyr_[level], nextPyr_[level],
prevPts.ptr<float2>(), nextPts.ptr<float2>(), status.ptr(), level == 0 && err ? err->ptr<float>() : 0, prevPts.cols,
level, block, patch);
GpuMat buf = pool.getBuffer(prevImg.size(), CV_MAKE_TYPE(prevImg.depth(), 4));
cuda::cvtColor(prevImg, buf, COLOR_BGR2BGRA, 0, stream);
buf.convertTo(prevPyr_[0], CV_32F, stream);
cuda::cvtColor(nextImg, buf, COLOR_BGR2BGRA, 0, stream);
buf.convertTo(nextPyr_[0], CV_32F, stream);
}
for (int level = 1; level <= maxLevel_; ++level)
{
cuda::pyrDown(prevPyr_[level - 1], prevPyr_[level], stream);
cuda::pyrDown(nextPyr_[level - 1], nextPyr_[level], stream);
}
pyrlk::loadConstants(make_int2(winSize_.width, winSize_.height), iters_, StreamAccessor::getStream(stream));
for (int level = maxLevel_; level >= 0; level--)
{
if (cn == 1)
{
pyrlk::sparse1(prevPyr_[level], nextPyr_[level],
prevPts.ptr<float2>(), nextPts.ptr<float2>(),
status.ptr(),
level == 0 && err ? err->ptr<float>() : 0, prevPts.cols,
level, block, patch,
StreamAccessor::getStream(stream));
}
else
{
pyrlk::sparse4(prevPyr_[level], nextPyr_[level],
prevPts.ptr<float2>(), nextPts.ptr<float2>(),
status.ptr(),
level == 0 && err ? err->ptr<float>() : 0, prevPts.cols,
level, block, patch,
StreamAccessor::getStream(stream));
}
}
}
}
void cv::cuda::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err)
{
CV_Assert(prevImg.type() == CV_8UC1);
CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type());
CV_Assert(maxLevel >= 0);
CV_Assert(winSize.width > 2 && winSize.height > 2);
if (err)
err->create(prevImg.size(), CV_32FC1);
// build the image pyramids.
prevPyr_.resize(maxLevel + 1);
nextPyr_.resize(maxLevel + 1);
prevPyr_[0] = prevImg;
nextImg.convertTo(nextPyr_[0], CV_32F);
for (int level = 1; level <= maxLevel; ++level)
void PyrLKOpticalFlowBase::dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, Stream& stream)
{
cuda::pyrDown(prevPyr_[level - 1], prevPyr_[level]);
cuda::pyrDown(nextPyr_[level - 1], nextPyr_[level]);
CV_Assert( prevImg.type() == CV_8UC1 );
CV_Assert( prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type() );
CV_Assert( maxLevel_ >= 0 );
CV_Assert( winSize_.width > 2 && winSize_.height > 2 );
// build the image pyramids.
prevPyr_.resize(maxLevel_ + 1);
nextPyr_.resize(maxLevel_ + 1);
prevPyr_[0] = prevImg;
nextImg.convertTo(nextPyr_[0], CV_32F, stream);
for (int level = 1; level <= maxLevel_; ++level)
{
cuda::pyrDown(prevPyr_[level - 1], prevPyr_[level], stream);
cuda::pyrDown(nextPyr_[level - 1], nextPyr_[level], stream);
}
BufferPool pool(stream);
GpuMat uPyr[] = {
pool.getBuffer(prevImg.size(), CV_32FC1),
pool.getBuffer(prevImg.size(), CV_32FC1),
};
GpuMat vPyr[] = {
pool.getBuffer(prevImg.size(), CV_32FC1),
pool.getBuffer(prevImg.size(), CV_32FC1),
};
uPyr[0].setTo(Scalar::all(0), stream);
vPyr[0].setTo(Scalar::all(0), stream);
uPyr[1].setTo(Scalar::all(0), stream);
vPyr[1].setTo(Scalar::all(0), stream);
int2 winSize2i = make_int2(winSize_.width, winSize_.height);
pyrlk::loadConstants(winSize2i, iters_, StreamAccessor::getStream(stream));
int idx = 0;
for (int level = maxLevel_; level >= 0; level--)
{
int idx2 = (idx + 1) & 1;
pyrlk::dense(prevPyr_[level], nextPyr_[level],
uPyr[idx], vPyr[idx], uPyr[idx2], vPyr[idx2],
PtrStepSzf(), winSize2i,
StreamAccessor::getStream(stream));
if (level > 0)
idx = idx2;
}
uPyr[idx].copyTo(u, stream);
vPyr[idx].copyTo(v, stream);
}
ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[0]);
ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[0]);
ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[1]);
ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[1]);
uPyr_[0].setTo(Scalar::all(0));
vPyr_[0].setTo(Scalar::all(0));
uPyr_[1].setTo(Scalar::all(0));
vPyr_[1].setTo(Scalar::all(0));
int2 winSize2i = make_int2(winSize.width, winSize.height);
pyrlk::loadConstants(winSize2i, iters);
PtrStepSzf derr = err ? *err : PtrStepSzf();
int idx = 0;
for (int level = maxLevel; level >= 0; level--)
class SparsePyrLKOpticalFlowImpl : public SparsePyrLKOpticalFlow, private PyrLKOpticalFlowBase
{
int idx2 = (idx + 1) & 1;
public:
SparsePyrLKOpticalFlowImpl(Size winSize, int maxLevel, int iters, bool useInitialFlow) :
PyrLKOpticalFlowBase(winSize, maxLevel, iters, useInitialFlow)
{
}
pyrlk::dense(prevPyr_[level], nextPyr_[level], uPyr_[idx], vPyr_[idx], uPyr_[idx2], vPyr_[idx2],
level == 0 ? derr : PtrStepSzf(), winSize2i);
virtual Size getWinSize() const { return winSize_; }
virtual void setWinSize(Size winSize) { winSize_ = winSize; }
if (level > 0)
idx = idx2;
}
virtual int getMaxLevel() const { return maxLevel_; }
virtual void setMaxLevel(int maxLevel) { maxLevel_ = maxLevel; }
uPyr_[idx].copyTo(u);
vPyr_[idx].copyTo(v);
virtual int getNumIters() const { return iters_; }
virtual void setNumIters(int iters) { iters_ = iters; }
virtual bool getUseInitialFlow() const { return useInitialFlow_; }
virtual void setUseInitialFlow(bool useInitialFlow) { useInitialFlow_ = useInitialFlow; }
virtual void calc(InputArray _prevImg, InputArray _nextImg,
InputArray _prevPts, InputOutputArray _nextPts,
OutputArray _status,
OutputArray _err,
Stream& stream)
{
const GpuMat prevImg = _prevImg.getGpuMat();
const GpuMat nextImg = _nextImg.getGpuMat();
const GpuMat prevPts = _prevPts.getGpuMat();
GpuMat& nextPts = _nextPts.getGpuMatRef();
GpuMat& status = _status.getGpuMatRef();
GpuMat* err = _err.needed() ? &(_err.getGpuMatRef()) : NULL;
sparse(prevImg, nextImg, prevPts, nextPts, status, err, stream);
}
};
class DensePyrLKOpticalFlowImpl : public DensePyrLKOpticalFlow, private PyrLKOpticalFlowBase
{
public:
DensePyrLKOpticalFlowImpl(Size winSize, int maxLevel, int iters, bool useInitialFlow) :
PyrLKOpticalFlowBase(winSize, maxLevel, iters, useInitialFlow)
{
}
virtual Size getWinSize() const { return winSize_; }
virtual void setWinSize(Size winSize) { winSize_ = winSize; }
virtual int getMaxLevel() const { return maxLevel_; }
virtual void setMaxLevel(int maxLevel) { maxLevel_ = maxLevel; }
virtual int getNumIters() const { return iters_; }
virtual void setNumIters(int iters) { iters_ = iters; }
virtual bool getUseInitialFlow() const { return useInitialFlow_; }
virtual void setUseInitialFlow(bool useInitialFlow) { useInitialFlow_ = useInitialFlow; }
virtual void calc(InputArray _prevImg, InputArray _nextImg, InputOutputArray _flow, Stream& stream)
{
const GpuMat prevImg = _prevImg.getGpuMat();
const GpuMat nextImg = _nextImg.getGpuMat();
BufferPool pool(stream);
GpuMat u = pool.getBuffer(prevImg.size(), CV_32FC1);
GpuMat v = pool.getBuffer(prevImg.size(), CV_32FC1);
dense(prevImg, nextImg, u, v, stream);
GpuMat flows[] = {u, v};
cuda::merge(flows, 2, _flow, stream);
}
};
}
void cv::cuda::PyrLKOpticalFlow::releaseMemory()
Ptr<SparsePyrLKOpticalFlow> cv::cuda::SparsePyrLKOpticalFlow::create(Size winSize, int maxLevel, int iters, bool useInitialFlow)
{
prevPyr_.clear();
nextPyr_.clear();
return makePtr<SparsePyrLKOpticalFlowImpl>(winSize, maxLevel, iters, useInitialFlow);
}
buf_.release();
uPyr_[0].release();
vPyr_[0].release();
uPyr_[1].release();
vPyr_[1].release();
Ptr<DensePyrLKOpticalFlow> cv::cuda::DensePyrLKOpticalFlow::create(Size winSize, int maxLevel, int iters, bool useInitialFlow)
{
return makePtr<DensePyrLKOpticalFlowImpl>(winSize, maxLevel, iters, useInitialFlow);
}
#endif /* !defined (HAVE_CUDA) */

View File

@ -44,257 +44,338 @@
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
cv::cuda::OpticalFlowDual_TVL1_CUDA::OpticalFlowDual_TVL1_CUDA() { throw_no_cuda(); }
void cv::cuda::OpticalFlowDual_TVL1_CUDA::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
void cv::cuda::OpticalFlowDual_TVL1_CUDA::collectGarbage() {}
void cv::cuda::OpticalFlowDual_TVL1_CUDA::procOneScale(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
Ptr<OpticalFlowDual_TVL1> cv::cuda::OpticalFlowDual_TVL1::create(double, double, double, int, int, double, int, double, double, bool) { throw_no_cuda(); return Ptr<OpticalFlowDual_TVL1>(); }
#else
using namespace cv;
using namespace cv::cuda;
cv::cuda::OpticalFlowDual_TVL1_CUDA::OpticalFlowDual_TVL1_CUDA()
{
tau = 0.25;
lambda = 0.15;
theta = 0.3;
nscales = 5;
warps = 5;
epsilon = 0.01;
iterations = 300;
scaleStep = 0.8;
gamma = 0.0;
useInitialFlow = false;
}
void cv::cuda::OpticalFlowDual_TVL1_CUDA::operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy)
{
CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 );
CV_Assert( I0.size() == I1.size() );
CV_Assert( I0.type() == I1.type() );
CV_Assert( !useInitialFlow || (flowx.size() == I0.size() && flowx.type() == CV_32FC1 && flowy.size() == flowx.size() && flowy.type() == flowx.type()) );
CV_Assert( nscales > 0 );
// allocate memory for the pyramid structure
I0s.resize(nscales);
I1s.resize(nscales);
u1s.resize(nscales);
u2s.resize(nscales);
u3s.resize(nscales);
I0.convertTo(I0s[0], CV_32F, I0.depth() == CV_8U ? 1.0 : 255.0);
I1.convertTo(I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0);
if (!useInitialFlow)
{
flowx.create(I0.size(), CV_32FC1);
flowy.create(I0.size(), CV_32FC1);
}
u1s[0] = flowx;
u2s[0] = flowy;
if (gamma)
u3s[0].create(I0.size(), CV_32FC1);
I1x_buf.create(I0.size(), CV_32FC1);
I1y_buf.create(I0.size(), CV_32FC1);
I1w_buf.create(I0.size(), CV_32FC1);
I1wx_buf.create(I0.size(), CV_32FC1);
I1wy_buf.create(I0.size(), CV_32FC1);
grad_buf.create(I0.size(), CV_32FC1);
rho_c_buf.create(I0.size(), CV_32FC1);
p11_buf.create(I0.size(), CV_32FC1);
p12_buf.create(I0.size(), CV_32FC1);
p21_buf.create(I0.size(), CV_32FC1);
p22_buf.create(I0.size(), CV_32FC1);
if (gamma)
{
p31_buf.create(I0.size(), CV_32FC1);
p32_buf.create(I0.size(), CV_32FC1);
}
diff_buf.create(I0.size(), CV_32FC1);
// create the scales
for (int s = 1; s < nscales; ++s)
{
cuda::resize(I0s[s-1], I0s[s], Size(), scaleStep, scaleStep);
cuda::resize(I1s[s-1], I1s[s], Size(), scaleStep, scaleStep);
if (I0s[s].cols < 16 || I0s[s].rows < 16)
{
nscales = s;
break;
}
if (useInitialFlow)
{
cuda::resize(u1s[s-1], u1s[s], Size(), scaleStep, scaleStep);
cuda::resize(u2s[s-1], u2s[s], Size(), scaleStep, scaleStep);
cuda::multiply(u1s[s], Scalar::all(scaleStep), u1s[s]);
cuda::multiply(u2s[s], Scalar::all(scaleStep), u2s[s]);
}
else
{
u1s[s].create(I0s[s].size(), CV_32FC1);
u2s[s].create(I0s[s].size(), CV_32FC1);
}
if (gamma)
u3s[s].create(I0s[s].size(), CV_32FC1);
}
if (!useInitialFlow)
{
u1s[nscales-1].setTo(Scalar::all(0));
u2s[nscales-1].setTo(Scalar::all(0));
}
if (gamma)
u3s[nscales - 1].setTo(Scalar::all(0));
// pyramidal structure for computing the optical flow
for (int s = nscales - 1; s >= 0; --s)
{
// compute the optical flow at the current scale
procOneScale(I0s[s], I1s[s], u1s[s], u2s[s], u3s[s]);
// if this was the last scale, finish now
if (s == 0)
break;
// otherwise, upsample the optical flow
// zoom the optical flow for the next finer scale
cuda::resize(u1s[s], u1s[s - 1], I0s[s - 1].size());
cuda::resize(u2s[s], u2s[s - 1], I0s[s - 1].size());
if (gamma)
cuda::resize(u3s[s], u3s[s - 1], I0s[s - 1].size());
// scale the optical flow with the appropriate zoom factor
cuda::multiply(u1s[s - 1], Scalar::all(1/scaleStep), u1s[s - 1]);
cuda::multiply(u2s[s - 1], Scalar::all(1/scaleStep), u2s[s - 1]);
}
}
namespace tvl1flow
{
void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy);
void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx, PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho);
void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy, cudaStream_t stream);
void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y,
PtrStepSzf u1, PtrStepSzf u2,
PtrStepSzf I1w, PtrStepSzf I1wx, PtrStepSzf I1wy,
PtrStepSzf grad, PtrStepSzf rho,
cudaStream_t stream);
void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy,
PtrStepSzf grad, PtrStepSzf rho_c,
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32,
PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf error,
float l_t, float theta, float gamma, bool calcError);
void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32, float taut, const float gamma);
float l_t, float theta, float gamma, bool calcError,
cudaStream_t stream);
void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3,
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32,
float taut, float gamma,
cudaStream_t stream);
}
void cv::cuda::OpticalFlowDual_TVL1_CUDA::procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2, GpuMat& u3)
namespace
{
using namespace tvl1flow;
const double scaledEpsilon = epsilon * epsilon * I0.size().area();
CV_DbgAssert( I1.size() == I0.size() );
CV_DbgAssert( I1.type() == I0.type() );
CV_DbgAssert( u1.size() == I0.size() );
CV_DbgAssert( u2.size() == u1.size() );
GpuMat I1x = I1x_buf(Rect(0, 0, I0.cols, I0.rows));
GpuMat I1y = I1y_buf(Rect(0, 0, I0.cols, I0.rows));
centeredGradient(I1, I1x, I1y);
GpuMat I1w = I1w_buf(Rect(0, 0, I0.cols, I0.rows));
GpuMat I1wx = I1wx_buf(Rect(0, 0, I0.cols, I0.rows));
GpuMat I1wy = I1wy_buf(Rect(0, 0, I0.cols, I0.rows));
GpuMat grad = grad_buf(Rect(0, 0, I0.cols, I0.rows));
GpuMat rho_c = rho_c_buf(Rect(0, 0, I0.cols, I0.rows));
GpuMat p11 = p11_buf(Rect(0, 0, I0.cols, I0.rows));
GpuMat p12 = p12_buf(Rect(0, 0, I0.cols, I0.rows));
GpuMat p21 = p21_buf(Rect(0, 0, I0.cols, I0.rows));
GpuMat p22 = p22_buf(Rect(0, 0, I0.cols, I0.rows));
GpuMat p31, p32;
if (gamma)
class OpticalFlowDual_TVL1_Impl : public OpticalFlowDual_TVL1
{
p31 = p31_buf(Rect(0, 0, I0.cols, I0.rows));
p32 = p32_buf(Rect(0, 0, I0.cols, I0.rows));
}
p11.setTo(Scalar::all(0));
p12.setTo(Scalar::all(0));
p21.setTo(Scalar::all(0));
p22.setTo(Scalar::all(0));
if (gamma)
{
p31.setTo(Scalar::all(0));
p32.setTo(Scalar::all(0));
}
GpuMat diff = diff_buf(Rect(0, 0, I0.cols, I0.rows));
const float l_t = static_cast<float>(lambda * theta);
const float taut = static_cast<float>(tau / theta);
for (int warpings = 0; warpings < warps; ++warpings)
{
warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c);
double error = std::numeric_limits<double>::max();
double prevError = 0.0;
for (int n = 0; error > scaledEpsilon && n < iterations; ++n)
public:
OpticalFlowDual_TVL1_Impl(double tau, double lambda, double theta, int nscales, int warps, double epsilon,
int iterations, double scaleStep, double gamma, bool useInitialFlow) :
tau_(tau), lambda_(lambda), gamma_(gamma), theta_(theta), nscales_(nscales), warps_(warps),
epsilon_(epsilon), iterations_(iterations), scaleStep_(scaleStep), useInitialFlow_(useInitialFlow)
{
// some tweaks to make sum operation less frequently
bool calcError = (epsilon > 0) && (n & 0x1) && (prevError < scaledEpsilon);
cv::Mat m1(u3);
estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, diff, l_t, static_cast<float>(theta), gamma, calcError);
if (calcError)
}
virtual double getTau() const { return tau_; }
virtual void setTau(double tau) { tau_ = tau; }
virtual double getLambda() const { return lambda_; }
virtual void setLambda(double lambda) { lambda_ = lambda; }
virtual double getGamma() const { return gamma_; }
virtual void setGamma(double gamma) { gamma_ = gamma; }
virtual double getTheta() const { return theta_; }
virtual void setTheta(double theta) { theta_ = theta; }
virtual int getNumScales() const { return nscales_; }
virtual void setNumScales(int nscales) { nscales_ = nscales; }
virtual int getNumWarps() const { return warps_; }
virtual void setNumWarps(int warps) { warps_ = warps; }
virtual double getEpsilon() const { return epsilon_; }
virtual void setEpsilon(double epsilon) { epsilon_ = epsilon; }
virtual int getNumIterations() const { return iterations_; }
virtual void setNumIterations(int iterations) { iterations_ = iterations; }
virtual double getScaleStep() const { return scaleStep_; }
virtual void setScaleStep(double scaleStep) { scaleStep_ = scaleStep; }
virtual bool getUseInitialFlow() const { return useInitialFlow_; }
virtual void setUseInitialFlow(bool useInitialFlow) { useInitialFlow_ = useInitialFlow; }
virtual void calc(InputArray I0, InputArray I1, InputOutputArray flow, Stream& stream);
private:
double tau_;
double lambda_;
double gamma_;
double theta_;
int nscales_;
int warps_;
double epsilon_;
int iterations_;
double scaleStep_;
bool useInitialFlow_;
private:
void calcImpl(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, Stream& stream);
void procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2, GpuMat& u3, Stream& stream);
std::vector<GpuMat> I0s;
std::vector<GpuMat> I1s;
std::vector<GpuMat> u1s;
std::vector<GpuMat> u2s;
std::vector<GpuMat> u3s;
GpuMat I1x_buf;
GpuMat I1y_buf;
GpuMat I1w_buf;
GpuMat I1wx_buf;
GpuMat I1wy_buf;
GpuMat grad_buf;
GpuMat rho_c_buf;
GpuMat p11_buf;
GpuMat p12_buf;
GpuMat p21_buf;
GpuMat p22_buf;
GpuMat p31_buf;
GpuMat p32_buf;
GpuMat diff_buf;
GpuMat norm_buf;
};
void OpticalFlowDual_TVL1_Impl::calc(InputArray _frame0, InputArray _frame1, InputOutputArray _flow, Stream& stream)
{
const GpuMat frame0 = _frame0.getGpuMat();
const GpuMat frame1 = _frame1.getGpuMat();
BufferPool pool(stream);
GpuMat flowx = pool.getBuffer(frame0.size(), CV_32FC1);
GpuMat flowy = pool.getBuffer(frame0.size(), CV_32FC1);
calcImpl(frame0, frame1, flowx, flowy, stream);
GpuMat flows[] = {flowx, flowy};
cuda::merge(flows, 2, _flow, stream);
}
void OpticalFlowDual_TVL1_Impl::calcImpl(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, Stream& stream)
{
CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 );
CV_Assert( I0.size() == I1.size() );
CV_Assert( I0.type() == I1.type() );
CV_Assert( !useInitialFlow_ || (flowx.size() == I0.size() && flowx.type() == CV_32FC1 && flowy.size() == flowx.size() && flowy.type() == flowx.type()) );
CV_Assert( nscales_ > 0 );
// allocate memory for the pyramid structure
I0s.resize(nscales_);
I1s.resize(nscales_);
u1s.resize(nscales_);
u2s.resize(nscales_);
u3s.resize(nscales_);
I0.convertTo(I0s[0], CV_32F, I0.depth() == CV_8U ? 1.0 : 255.0, stream);
I1.convertTo(I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0, stream);
if (!useInitialFlow_)
{
flowx.create(I0.size(), CV_32FC1);
flowy.create(I0.size(), CV_32FC1);
}
u1s[0] = flowx;
u2s[0] = flowy;
if (gamma_)
{
u3s[0].create(I0.size(), CV_32FC1);
}
I1x_buf.create(I0.size(), CV_32FC1);
I1y_buf.create(I0.size(), CV_32FC1);
I1w_buf.create(I0.size(), CV_32FC1);
I1wx_buf.create(I0.size(), CV_32FC1);
I1wy_buf.create(I0.size(), CV_32FC1);
grad_buf.create(I0.size(), CV_32FC1);
rho_c_buf.create(I0.size(), CV_32FC1);
p11_buf.create(I0.size(), CV_32FC1);
p12_buf.create(I0.size(), CV_32FC1);
p21_buf.create(I0.size(), CV_32FC1);
p22_buf.create(I0.size(), CV_32FC1);
if (gamma_)
{
p31_buf.create(I0.size(), CV_32FC1);
p32_buf.create(I0.size(), CV_32FC1);
}
diff_buf.create(I0.size(), CV_32FC1);
// create the scales
for (int s = 1; s < nscales_; ++s)
{
cuda::resize(I0s[s-1], I0s[s], Size(), scaleStep_, scaleStep_, INTER_LINEAR, stream);
cuda::resize(I1s[s-1], I1s[s], Size(), scaleStep_, scaleStep_, INTER_LINEAR, stream);
if (I0s[s].cols < 16 || I0s[s].rows < 16)
{
error = cuda::sum(diff, norm_buf)[0];
prevError = error;
nscales_ = s;
break;
}
if (useInitialFlow_)
{
cuda::resize(u1s[s-1], u1s[s], Size(), scaleStep_, scaleStep_, INTER_LINEAR, stream);
cuda::resize(u2s[s-1], u2s[s], Size(), scaleStep_, scaleStep_, INTER_LINEAR, stream);
cuda::multiply(u1s[s], Scalar::all(scaleStep_), u1s[s], 1, -1, stream);
cuda::multiply(u2s[s], Scalar::all(scaleStep_), u2s[s], 1, -1, stream);
}
else
{
error = std::numeric_limits<double>::max();
prevError -= scaledEpsilon;
u1s[s].create(I0s[s].size(), CV_32FC1);
u2s[s].create(I0s[s].size(), CV_32FC1);
}
if (gamma_)
{
u3s[s].create(I0s[s].size(), CV_32FC1);
}
}
if (!useInitialFlow_)
{
u1s[nscales_-1].setTo(Scalar::all(0), stream);
u2s[nscales_-1].setTo(Scalar::all(0), stream);
}
if (gamma_)
{
u3s[nscales_ - 1].setTo(Scalar::all(0), stream);
}
// pyramidal structure for computing the optical flow
for (int s = nscales_ - 1; s >= 0; --s)
{
// compute the optical flow at the current scale
procOneScale(I0s[s], I1s[s], u1s[s], u2s[s], u3s[s], stream);
// if this was the last scale, finish now
if (s == 0)
break;
// otherwise, upsample the optical flow
// zoom the optical flow for the next finer scale
cuda::resize(u1s[s], u1s[s - 1], I0s[s - 1].size(), 0, 0, INTER_LINEAR, stream);
cuda::resize(u2s[s], u2s[s - 1], I0s[s - 1].size(), 0, 0, INTER_LINEAR, stream);
if (gamma_)
{
cuda::resize(u3s[s], u3s[s - 1], I0s[s - 1].size(), 0, 0, INTER_LINEAR, stream);
}
estimateDualVariables(u1, u2, u3, p11, p12, p21, p22, p31, p32, taut, gamma);
// scale the optical flow with the appropriate zoom factor
cuda::multiply(u1s[s - 1], Scalar::all(1/scaleStep_), u1s[s - 1], 1, -1, stream);
cuda::multiply(u2s[s - 1], Scalar::all(1/scaleStep_), u2s[s - 1], 1, -1, stream);
}
}
void OpticalFlowDual_TVL1_Impl::procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2, GpuMat& u3, Stream& _stream)
{
using namespace tvl1flow;
cudaStream_t stream = StreamAccessor::getStream(_stream);
const double scaledEpsilon = epsilon_ * epsilon_ * I0.size().area();
CV_DbgAssert( I1.size() == I0.size() );
CV_DbgAssert( I1.type() == I0.type() );
CV_DbgAssert( u1.size() == I0.size() );
CV_DbgAssert( u2.size() == u1.size() );
GpuMat I1x = I1x_buf(Rect(0, 0, I0.cols, I0.rows));
GpuMat I1y = I1y_buf(Rect(0, 0, I0.cols, I0.rows));
centeredGradient(I1, I1x, I1y, stream);
GpuMat I1w = I1w_buf(Rect(0, 0, I0.cols, I0.rows));
GpuMat I1wx = I1wx_buf(Rect(0, 0, I0.cols, I0.rows));
GpuMat I1wy = I1wy_buf(Rect(0, 0, I0.cols, I0.rows));
GpuMat grad = grad_buf(Rect(0, 0, I0.cols, I0.rows));
GpuMat rho_c = rho_c_buf(Rect(0, 0, I0.cols, I0.rows));
GpuMat p11 = p11_buf(Rect(0, 0, I0.cols, I0.rows));
GpuMat p12 = p12_buf(Rect(0, 0, I0.cols, I0.rows));
GpuMat p21 = p21_buf(Rect(0, 0, I0.cols, I0.rows));
GpuMat p22 = p22_buf(Rect(0, 0, I0.cols, I0.rows));
GpuMat p31, p32;
if (gamma_)
{
p31 = p31_buf(Rect(0, 0, I0.cols, I0.rows));
p32 = p32_buf(Rect(0, 0, I0.cols, I0.rows));
}
p11.setTo(Scalar::all(0), _stream);
p12.setTo(Scalar::all(0), _stream);
p21.setTo(Scalar::all(0), _stream);
p22.setTo(Scalar::all(0), _stream);
if (gamma_)
{
p31.setTo(Scalar::all(0), _stream);
p32.setTo(Scalar::all(0), _stream);
}
GpuMat diff = diff_buf(Rect(0, 0, I0.cols, I0.rows));
const float l_t = static_cast<float>(lambda_ * theta_);
const float taut = static_cast<float>(tau_ / theta_);
for (int warpings = 0; warpings < warps_; ++warpings)
{
warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c, stream);
double error = std::numeric_limits<double>::max();
double prevError = 0.0;
for (int n = 0; error > scaledEpsilon && n < iterations_; ++n)
{
// some tweaks to make sum operation less frequently
bool calcError = (epsilon_ > 0) && (n & 0x1) && (prevError < scaledEpsilon);
estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, diff, l_t, static_cast<float>(theta_), gamma_, calcError, stream);
if (calcError)
{
_stream.waitForCompletion();
error = cuda::sum(diff, norm_buf)[0];
prevError = error;
}
else
{
error = std::numeric_limits<double>::max();
prevError -= scaledEpsilon;
}
estimateDualVariables(u1, u2, u3, p11, p12, p21, p22, p31, p32, taut, gamma_, stream);
}
}
}
}
void cv::cuda::OpticalFlowDual_TVL1_CUDA::collectGarbage()
Ptr<OpticalFlowDual_TVL1> cv::cuda::OpticalFlowDual_TVL1::create(
double tau, double lambda, double theta, int nscales, int warps,
double epsilon, int iterations, double scaleStep, double gamma, bool useInitialFlow)
{
I0s.clear();
I1s.clear();
u1s.clear();
u2s.clear();
u3s.clear();
I1x_buf.release();
I1y_buf.release();
I1w_buf.release();
I1wx_buf.release();
I1wy_buf.release();
grad_buf.release();
rho_c_buf.release();
p11_buf.release();
p12_buf.release();
p21_buf.release();
p22_buf.release();
if (gamma)
{
p31_buf.release();
p32_buf.release();
}
diff_buf.release();
norm_buf.release();
return makePtr<OpticalFlowDual_TVL1_Impl>(tau, lambda, theta, nscales, warps,
epsilon, iterations, scaleStep, gamma, useInitialFlow);
}
#endif // !defined HAVE_CUDA || defined(CUDA_DISABLER)

View File

@ -71,12 +71,18 @@ CUDA_TEST_P(BroxOpticalFlow, Regression)
cv::Mat frame1 = readImageType("opticalflow/frame1.png", CV_32FC1);
ASSERT_FALSE(frame1.empty());
cv::cuda::BroxOpticalFlow brox(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/,
10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/);
cv::Ptr<cv::cuda::BroxOpticalFlow> brox =
cv::cuda::BroxOpticalFlow::create(0.197 /*alpha*/, 50.0 /*gamma*/, 0.8 /*scale_factor*/,
10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/);
cv::cuda::GpuMat u;
cv::cuda::GpuMat v;
brox(loadMat(frame0), loadMat(frame1), u, v);
cv::cuda::GpuMat flow;
brox->calc(loadMat(frame0), loadMat(frame1), flow);
cv::cuda::GpuMat flows[2];
cv::cuda::split(flow, flows);
cv::cuda::GpuMat u = flows[0];
cv::cuda::GpuMat v = flows[1];
std::string fname(cvtest::TS::ptr()->get_data_path());
if (devInfo.majorVersion() >= 2)
@ -133,12 +139,18 @@ CUDA_TEST_P(BroxOpticalFlow, OpticalFlowNan)
cv::resize(frame0, r_frame0, cv::Size(1380,1000));
cv::resize(frame1, r_frame1, cv::Size(1380,1000));
cv::cuda::BroxOpticalFlow brox(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/,
5 /*inner_iterations*/, 150 /*outer_iterations*/, 10 /*solver_iterations*/);
cv::Ptr<cv::cuda::BroxOpticalFlow> brox =
cv::cuda::BroxOpticalFlow::create(0.197 /*alpha*/, 50.0 /*gamma*/, 0.8 /*scale_factor*/,
10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/);
cv::cuda::GpuMat u;
cv::cuda::GpuMat v;
brox(loadMat(r_frame0), loadMat(r_frame1), u, v);
cv::cuda::GpuMat flow;
brox->calc(loadMat(frame0), loadMat(frame1), flow);
cv::cuda::GpuMat flows[2];
cv::cuda::split(flow, flows);
cv::cuda::GpuMat u = flows[0];
cv::cuda::GpuMat v = flows[1];
cv::Mat h_u, h_v;
u.download(h_u);
@ -193,11 +205,12 @@ CUDA_TEST_P(PyrLKOpticalFlow, Sparse)
cv::Mat pts_mat(1, (int) pts.size(), CV_32FC2, (void*) &pts[0]);
d_pts.upload(pts_mat);
cv::cuda::PyrLKOpticalFlow pyrLK;
cv::Ptr<cv::cuda::SparsePyrLKOpticalFlow> pyrLK =
cv::cuda::SparsePyrLKOpticalFlow::create();
cv::cuda::GpuMat d_nextPts;
cv::cuda::GpuMat d_status;
pyrLK.sparse(loadMat(frame0), loadMat(frame1), d_pts, d_nextPts, d_status);
pyrLK->calc(loadMat(frame0), loadMat(frame1), d_pts, d_nextPts, d_status);
std::vector<cv::Point2f> nextPts(d_nextPts.cols);
cv::Mat nextPts_mat(1, d_nextPts.cols, CV_32FC2, (void*) &nextPts[0]);
@ -285,34 +298,30 @@ CUDA_TEST_P(FarnebackOpticalFlow, Accuracy)
double polySigma = polyN <= 5 ? 1.1 : 1.5;
cv::cuda::FarnebackOpticalFlow farn;
farn.pyrScale = pyrScale;
farn.polyN = polyN;
farn.polySigma = polySigma;
farn.flags = flags;
cv::Ptr<cv::cuda::FarnebackOpticalFlow> farn =
cv::cuda::FarnebackOpticalFlow::create();
farn->setPyrScale(pyrScale);
farn->setPolyN(polyN);
farn->setPolySigma(polySigma);
farn->setFlags(flags);
cv::cuda::GpuMat d_flowx, d_flowy;
farn(loadMat(frame0), loadMat(frame1), d_flowx, d_flowy);
cv::cuda::GpuMat d_flow;
farn->calc(loadMat(frame0), loadMat(frame1), d_flow);
cv::Mat flow;
if (useInitFlow)
{
cv::Mat flowxy[] = {cv::Mat(d_flowx), cv::Mat(d_flowy)};
cv::merge(flowxy, 2, flow);
d_flow.download(flow);
farn.flags |= cv::OPTFLOW_USE_INITIAL_FLOW;
farn(loadMat(frame0), loadMat(frame1), d_flowx, d_flowy);
farn->setFlags(farn->getFlags() | cv::OPTFLOW_USE_INITIAL_FLOW);
farn->calc(loadMat(frame0), loadMat(frame1), d_flow);
}
cv::calcOpticalFlowFarneback(
frame0, frame1, flow, farn.pyrScale, farn.numLevels, farn.winSize,
farn.numIters, farn.polyN, farn.polySigma, farn.flags);
frame0, frame1, flow, farn->getPyrScale(), farn->getNumLevels(), farn->getWinSize(),
farn->getNumIters(), farn->getPolyN(), farn->getPolySigma(), farn->getFlags());
std::vector<cv::Mat> flowxy;
cv::split(flow, flowxy);
EXPECT_MAT_SIMILAR(flowxy[0], d_flowx, 0.1);
EXPECT_MAT_SIMILAR(flowxy[1], d_flowy, 0.1);
EXPECT_MAT_SIMILAR(flow, d_flow, 0.1);
}
INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, FarnebackOpticalFlow, testing::Combine(
@ -325,15 +334,20 @@ INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, FarnebackOpticalFlow, testing::Combine(
//////////////////////////////////////////////////////
// OpticalFlowDual_TVL1
PARAM_TEST_CASE(OpticalFlowDual_TVL1, cv::cuda::DeviceInfo, UseRoi)
namespace
{
IMPLEMENT_PARAM_CLASS(Gamma, double)
}
PARAM_TEST_CASE(OpticalFlowDual_TVL1, cv::cuda::DeviceInfo, Gamma)
{
cv::cuda::DeviceInfo devInfo;
bool useRoi;
double gamma;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
useRoi = GET_PARAM(1);
gamma = GET_PARAM(1);
cv::cuda::setDevice(devInfo.deviceID());
}
@ -347,156 +361,28 @@ CUDA_TEST_P(OpticalFlowDual_TVL1, Accuracy)
cv::Mat frame1 = readImage("opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty());
cv::cuda::OpticalFlowDual_TVL1_CUDA d_alg;
cv::cuda::GpuMat d_flowx = createMat(frame0.size(), CV_32FC1, useRoi);
cv::cuda::GpuMat d_flowy = createMat(frame0.size(), CV_32FC1, useRoi);
d_alg(loadMat(frame0, useRoi), loadMat(frame1, useRoi), d_flowx, d_flowy);
cv::Ptr<cv::cuda::OpticalFlowDual_TVL1> d_alg =
cv::cuda::OpticalFlowDual_TVL1::create();
d_alg->setNumIterations(10);
d_alg->setGamma(gamma);
cv::cuda::GpuMat d_flow;
d_alg->calc(loadMat(frame0), loadMat(frame1), d_flow);
cv::Ptr<cv::DenseOpticalFlow> alg = cv::createOptFlow_DualTVL1();
alg->set("medianFiltering", 1);
alg->set("innerIterations", 1);
alg->set("outerIterations", d_alg.iterations);
alg->set("outerIterations", d_alg->getNumIterations());
alg->set("gamma", gamma);
cv::Mat flow;
alg->calc(frame0, frame1, flow);
cv::Mat gold[2];
cv::split(flow, gold);
cv::Mat mx(d_flowx);
cv::Mat my(d_flowx);
EXPECT_MAT_SIMILAR(gold[0], d_flowx, 4e-3);
EXPECT_MAT_SIMILAR(gold[1], d_flowy, 4e-3);
d_alg.gamma = 1;
alg->set("gamma", 1);
d_alg(loadMat(frame0, useRoi), loadMat(frame1, useRoi), d_flowx, d_flowy);
alg->calc(frame0, frame1, flow);
cv::split(flow, gold);
mx = cv::Mat(d_flowx);
my = cv::Mat(d_flowx);
EXPECT_MAT_SIMILAR(gold[0], d_flowx, 4e-3);
EXPECT_MAT_SIMILAR(gold[1], d_flowy, 4e-3);
EXPECT_MAT_SIMILAR(flow, d_flow, 4e-3);
}
INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, OpticalFlowDual_TVL1, testing::Combine(
ALL_DEVICES,
WHOLE_SUBMAT));
//////////////////////////////////////////////////////
// FastOpticalFlowBM
namespace
{
void FastOpticalFlowBM_gold(const cv::Mat_<uchar>& I0, const cv::Mat_<uchar>& I1, cv::Mat_<float>& velx, cv::Mat_<float>& vely, int search_window, int block_window)
{
velx.create(I0.size());
vely.create(I0.size());
int search_radius = search_window / 2;
int block_radius = block_window / 2;
for (int y = 0; y < I0.rows; ++y)
{
for (int x = 0; x < I0.cols; ++x)
{
int bestDist = std::numeric_limits<int>::max();
int bestDx = 0;
int bestDy = 0;
for (int dy = -search_radius; dy <= search_radius; ++dy)
{
for (int dx = -search_radius; dx <= search_radius; ++dx)
{
int dist = 0;
for (int by = -block_radius; by <= block_radius; ++by)
{
for (int bx = -block_radius; bx <= block_radius; ++bx)
{
int I0_val = I0(cv::borderInterpolate(y + by, I0.rows, cv::BORDER_DEFAULT), cv::borderInterpolate(x + bx, I0.cols, cv::BORDER_DEFAULT));
int I1_val = I1(cv::borderInterpolate(y + dy + by, I0.rows, cv::BORDER_DEFAULT), cv::borderInterpolate(x + dx + bx, I0.cols, cv::BORDER_DEFAULT));
dist += std::abs(I0_val - I1_val);
}
}
if (dist < bestDist)
{
bestDist = dist;
bestDx = dx;
bestDy = dy;
}
}
}
velx(y, x) = (float) bestDx;
vely(y, x) = (float) bestDy;
}
}
}
double calc_rmse(const cv::Mat_<float>& flow1, const cv::Mat_<float>& flow2)
{
double sum = 0.0;
for (int y = 0; y < flow1.rows; ++y)
{
for (int x = 0; x < flow1.cols; ++x)
{
double diff = flow1(y, x) - flow2(y, x);
sum += diff * diff;
}
}
return std::sqrt(sum / flow1.size().area());
}
}
struct FastOpticalFlowBM : testing::TestWithParam<cv::cuda::DeviceInfo>
{
};
CUDA_TEST_P(FastOpticalFlowBM, Accuracy)
{
const double MAX_RMSE = 0.6;
int search_window = 15;
int block_window = 5;
cv::cuda::DeviceInfo devInfo = GetParam();
cv::cuda::setDevice(devInfo.deviceID());
cv::Mat frame0 = readImage("opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame0.empty());
cv::Mat frame1 = readImage("opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty());
cv::Size smallSize(320, 240);
cv::Mat frame0_small;
cv::Mat frame1_small;
cv::resize(frame0, frame0_small, smallSize);
cv::resize(frame1, frame1_small, smallSize);
cv::cuda::GpuMat d_flowx;
cv::cuda::GpuMat d_flowy;
cv::cuda::FastOpticalFlowBM fastBM;
fastBM(loadMat(frame0_small), loadMat(frame1_small), d_flowx, d_flowy, search_window, block_window);
cv::Mat_<float> flowx;
cv::Mat_<float> flowy;
FastOpticalFlowBM_gold(frame0_small, frame1_small, flowx, flowy, search_window, block_window);
double err;
err = calc_rmse(flowx, cv::Mat(d_flowx));
EXPECT_LE(err, MAX_RMSE);
err = calc_rmse(flowy, cv::Mat(d_flowy));
EXPECT_LE(err, MAX_RMSE);
}
INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, FastOpticalFlowBM, ALL_DEVICES);
testing::Values(Gamma(0.0), Gamma(1.0))));
#endif // HAVE_CUDA

View File

@ -57,6 +57,7 @@
#include "opencv2/ts/cuda_test.hpp"
#include "opencv2/cudaoptflow.hpp"
#include "opencv2/cudaarithm.hpp"
#include "opencv2/video.hpp"
#include "cvconfig.h"

View File

@ -341,7 +341,7 @@ namespace
int iterations_;
bool useInitialFlow_;
Ptr<DenseOpticalFlow> alg_;
Ptr<cv::DenseOpticalFlow> alg_;
};
CV_INIT_ALGORITHM(DualTVL1, "DenseOpticalFlowExt.DualTVL1",
@ -514,7 +514,7 @@ namespace
int outerIterations_;
int solverIterations_;
BroxOpticalFlow alg_;
Ptr<cuda::BroxOpticalFlow> alg_;
};
CV_INIT_ALGORITHM(Brox_CUDA, "DenseOpticalFlowExt.Brox_CUDA",
@ -525,31 +525,40 @@ namespace
obj.info()->addParam(obj, "outerIterations", obj.outerIterations_, false, 0, 0, "Number of warping iterations (number of pyramid levels)");
obj.info()->addParam(obj, "solverIterations", obj.solverIterations_, false, 0, 0, "Number of linear system solver iterations"))
Brox_CUDA::Brox_CUDA() : GpuOpticalFlow(CV_32FC1), alg_(0.197f, 50.0f, 0.8f, 10, 77, 10)
Brox_CUDA::Brox_CUDA() : GpuOpticalFlow(CV_32FC1)
{
alpha_ = alg_.alpha;
gamma_ = alg_.gamma;
scaleFactor_ = alg_.scale_factor;
innerIterations_ = alg_.inner_iterations;
outerIterations_ = alg_.outer_iterations;
solverIterations_ = alg_.solver_iterations;
alg_ = cuda::BroxOpticalFlow::create(0.197f, 50.0f, 0.8f, 10, 77, 10);
alpha_ = alg_->getFlowSmoothness();
gamma_ = alg_->getGradientConstancyImportance();
scaleFactor_ = alg_->getPyramidScaleFactor();
innerIterations_ = alg_->getInnerIterations();
outerIterations_ = alg_->getOuterIterations();
solverIterations_ = alg_->getSolverIterations();
}
void Brox_CUDA::impl(const GpuMat& input0, const GpuMat& input1, GpuMat& dst1, GpuMat& dst2)
{
alg_.alpha = static_cast<float>(alpha_);
alg_.gamma = static_cast<float>(gamma_);
alg_.scale_factor = static_cast<float>(scaleFactor_);
alg_.inner_iterations = innerIterations_;
alg_.outer_iterations = outerIterations_;
alg_.solver_iterations = solverIterations_;
alg_->setFlowSmoothness(alpha_);
alg_->setGradientConstancyImportance(gamma_);
alg_->setPyramidScaleFactor(scaleFactor_);
alg_->setInnerIterations(innerIterations_);
alg_->setOuterIterations(outerIterations_);
alg_->setSolverIterations(solverIterations_);
alg_(input0, input1, dst1, dst2);
GpuMat flow;
alg_->calc(input0, input1, flow);
GpuMat flows[2];
cuda::split(flow, flows);
dst1 = flows[0];
dst2 = flows[1];
}
void Brox_CUDA::collectGarbage()
{
alg_.buf.release();
alg_ = cuda::BroxOpticalFlow::create(alpha_, gamma_, scaleFactor_, innerIterations_, outerIterations_, solverIterations_);
GpuOpticalFlow::collectGarbage();
}
}
@ -581,7 +590,7 @@ namespace
int maxLevel_;
int iterations_;
PyrLKOpticalFlow alg_;
Ptr<cuda::DensePyrLKOpticalFlow> alg_;
};
CV_INIT_ALGORITHM(PyrLK_CUDA, "DenseOpticalFlowExt.PyrLK_CUDA",
@ -591,24 +600,32 @@ namespace
PyrLK_CUDA::PyrLK_CUDA() : GpuOpticalFlow(CV_8UC1)
{
winSize_ = alg_.winSize.width;
maxLevel_ = alg_.maxLevel;
iterations_ = alg_.iters;
alg_ = cuda::DensePyrLKOpticalFlow::create();
winSize_ = alg_->getWinSize().width;
maxLevel_ = alg_->getMaxLevel();
iterations_ = alg_->getNumIters();
}
void PyrLK_CUDA::impl(const GpuMat& input0, const GpuMat& input1, GpuMat& dst1, GpuMat& dst2)
{
alg_.winSize.width = winSize_;
alg_.winSize.height = winSize_;
alg_.maxLevel = maxLevel_;
alg_.iters = iterations_;
alg_->setWinSize(Size(winSize_, winSize_));
alg_->setMaxLevel(maxLevel_);
alg_->setNumIters(iterations_);
alg_.dense(input0, input1, dst1, dst2);
GpuMat flow;
alg_->calc(input0, input1, flow);
GpuMat flows[2];
cuda::split(flow, flows);
dst1 = flows[0];
dst2 = flows[1];
}
void PyrLK_CUDA::collectGarbage()
{
alg_.releaseMemory();
alg_ = cuda::DensePyrLKOpticalFlow::create();
GpuOpticalFlow::collectGarbage();
}
}
@ -644,7 +661,7 @@ namespace
double polySigma_;
int flags_;
FarnebackOpticalFlow alg_;
Ptr<cuda::FarnebackOpticalFlow> alg_;
};
CV_INIT_ALGORITHM(Farneback_CUDA, "DenseOpticalFlowExt.Farneback_CUDA",
@ -658,31 +675,40 @@ namespace
Farneback_CUDA::Farneback_CUDA() : GpuOpticalFlow(CV_8UC1)
{
pyrScale_ = alg_.pyrScale;
numLevels_ = alg_.numLevels;
winSize_ = alg_.winSize;
numIters_ = alg_.numIters;
polyN_ = alg_.polyN;
polySigma_ = alg_.polySigma;
flags_ = alg_.flags;
alg_ = cuda::FarnebackOpticalFlow::create();
pyrScale_ = alg_->getPyrScale();
numLevels_ = alg_->getNumLevels();
winSize_ = alg_->getWinSize();
numIters_ = alg_->getNumIters();
polyN_ = alg_->getPolyN();
polySigma_ = alg_->getPolySigma();
flags_ = alg_->getFlags();
}
void Farneback_CUDA::impl(const GpuMat& input0, const GpuMat& input1, GpuMat& dst1, GpuMat& dst2)
{
alg_.pyrScale = pyrScale_;
alg_.numLevels = numLevels_;
alg_.winSize = winSize_;
alg_.numIters = numIters_;
alg_.polyN = polyN_;
alg_.polySigma = polySigma_;
alg_.flags = flags_;
alg_->setPyrScale(pyrScale_);
alg_->setNumLevels(numLevels_);
alg_->setWinSize(winSize_);
alg_->setNumIters(numIters_);
alg_->setPolyN(polyN_);
alg_->setPolySigma(polySigma_);
alg_->setFlags(flags_);
alg_(input0, input1, dst1, dst2);
GpuMat flow;
alg_->calc(input0, input1, flow);
GpuMat flows[2];
cuda::split(flow, flows);
dst1 = flows[0];
dst2 = flows[1];
}
void Farneback_CUDA::collectGarbage()
{
alg_.releaseMemory();
alg_ = cuda::FarnebackOpticalFlow::create();
GpuOpticalFlow::collectGarbage();
}
}
@ -719,7 +745,7 @@ namespace
int iterations_;
bool useInitialFlow_;
OpticalFlowDual_TVL1_CUDA alg_;
Ptr<cuda::OpticalFlowDual_TVL1> alg_;
};
CV_INIT_ALGORITHM(DualTVL1_CUDA, "DenseOpticalFlowExt.DualTVL1_CUDA",
@ -734,33 +760,42 @@ namespace
DualTVL1_CUDA::DualTVL1_CUDA() : GpuOpticalFlow(CV_8UC1)
{
tau_ = alg_.tau;
lambda_ = alg_.lambda;
theta_ = alg_.theta;
nscales_ = alg_.nscales;
warps_ = alg_.warps;
epsilon_ = alg_.epsilon;
iterations_ = alg_.iterations;
useInitialFlow_ = alg_.useInitialFlow;
alg_ = cuda::OpticalFlowDual_TVL1::create();
tau_ = alg_->getTau();
lambda_ = alg_->getLambda();
theta_ = alg_->getTheta();
nscales_ = alg_->getNumScales();
warps_ = alg_->getNumWarps();
epsilon_ = alg_->getEpsilon();
iterations_ = alg_->getNumIterations();
useInitialFlow_ = alg_->getUseInitialFlow();
}
void DualTVL1_CUDA::impl(const GpuMat& input0, const GpuMat& input1, GpuMat& dst1, GpuMat& dst2)
{
alg_.tau = tau_;
alg_.lambda = lambda_;
alg_.theta = theta_;
alg_.nscales = nscales_;
alg_.warps = warps_;
alg_.epsilon = epsilon_;
alg_.iterations = iterations_;
alg_.useInitialFlow = useInitialFlow_;
alg_->setTau(tau_);
alg_->setLambda(lambda_);
alg_->setTheta(theta_);
alg_->setNumScales(nscales_);
alg_->setNumWarps(warps_);
alg_->setEpsilon(epsilon_);
alg_->setNumIterations(iterations_);
alg_->setUseInitialFlow(useInitialFlow_);
alg_(input0, input1, dst1, dst2);
GpuMat flow;
alg_->calc(input0, input1, flow);
GpuMat flows[2];
cuda::split(flow, flows);
dst1 = flows[0];
dst2 = flows[1];
}
void DualTVL1_CUDA::collectGarbage()
{
alg_.collectGarbage();
alg_ = cuda::OpticalFlowDual_TVL1::create();
GpuOpticalFlow::collectGarbage();
}
}

View File

@ -121,7 +121,7 @@ public:
cuda::GpuMat &status);
private:
cuda::PyrLKOpticalFlow optFlowEstimator_;
Ptr<cuda::SparsePyrLKOpticalFlow> optFlowEstimator_;
cuda::GpuMat frame0_, frame1_, points0_, points1_, status_, errors_;
};
@ -136,7 +136,7 @@ public:
OutputArray errors);
private:
cuda::PyrLKOpticalFlow optFlowEstimator_;
Ptr<cuda::DensePyrLKOpticalFlow> optFlowEstimator_;
cuda::GpuMat frame0_, frame1_, flowX_, flowY_, errors_;
};

View File

@ -45,6 +45,10 @@
#include "opencv2/videostab/optical_flow.hpp"
#include "opencv2/videostab/ring_buffer.hpp"
#ifdef HAVE_OPENCV_CUDAARITHM
#include "opencv2/cudaarithm.hpp"
#endif
namespace cv
{
namespace videostab
@ -63,6 +67,7 @@ void SparsePyrLkOptFlowEstimator::run(
SparsePyrLkOptFlowEstimatorGpu::SparsePyrLkOptFlowEstimatorGpu()
{
CV_Assert(cuda::getCudaEnabledDeviceCount() > 0);
optFlowEstimator_ = cuda::SparsePyrLKOpticalFlow::create();
}
@ -91,9 +96,9 @@ void SparsePyrLkOptFlowEstimatorGpu::run(
const cuda::GpuMat &frame0, const cuda::GpuMat &frame1, const cuda::GpuMat &points0,
cuda::GpuMat &points1, cuda::GpuMat &status, cuda::GpuMat &errors)
{
optFlowEstimator_.winSize = winSize_;
optFlowEstimator_.maxLevel = maxLevel_;
optFlowEstimator_.sparse(frame0, frame1, points0, points1, status, &errors);
optFlowEstimator_->setWinSize(winSize_);
optFlowEstimator_->setMaxLevel(maxLevel_);
optFlowEstimator_->calc(frame0, frame1, points0, points1, status, errors);
}
@ -101,15 +106,16 @@ void SparsePyrLkOptFlowEstimatorGpu::run(
const cuda::GpuMat &frame0, const cuda::GpuMat &frame1, const cuda::GpuMat &points0,
cuda::GpuMat &points1, cuda::GpuMat &status)
{
optFlowEstimator_.winSize = winSize_;
optFlowEstimator_.maxLevel = maxLevel_;
optFlowEstimator_.sparse(frame0, frame1, points0, points1, status);
optFlowEstimator_->setWinSize(winSize_);
optFlowEstimator_->setMaxLevel(maxLevel_);
optFlowEstimator_->calc(frame0, frame1, points0, points1, status);
}
DensePyrLkOptFlowEstimatorGpu::DensePyrLkOptFlowEstimatorGpu()
{
CV_Assert(cuda::getCudaEnabledDeviceCount() > 0);
optFlowEstimator_ = cuda::DensePyrLKOpticalFlow::create();
}
@ -120,16 +126,24 @@ void DensePyrLkOptFlowEstimatorGpu::run(
frame0_.upload(frame0.getMat());
frame1_.upload(frame1.getMat());
optFlowEstimator_.winSize = winSize_;
optFlowEstimator_.maxLevel = maxLevel_;
optFlowEstimator_->setWinSize(winSize_);
optFlowEstimator_->setMaxLevel(maxLevel_);
if (errors.needed())
{
optFlowEstimator_.dense(frame0_, frame1_, flowX_, flowY_, &errors_);
errors_.download(errors.getMatRef());
CV_Error(Error::StsNotImplemented, "DensePyrLkOptFlowEstimatorGpu doesn't support errors calculation");
}
else
optFlowEstimator_.dense(frame0_, frame1_, flowX_, flowY_);
{
cuda::GpuMat flow;
optFlowEstimator_->calc(frame0_, frame1_, flow);
cuda::GpuMat flows[2];
cuda::split(flow, flows);
flowX_ = flows[0];
flowY_ = flows[1];
}
flowX_.download(flowX.getMatRef());
flowY_.download(flowY.getMatRef());

View File

@ -1,270 +0,0 @@
#include <iostream>
#include <iomanip>
#include <string>
#include <ctype.h>
#include "opencv2/core.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/cudaoptflow.hpp"
#include "opencv2/cudaarithm.hpp"
using namespace std;
using namespace cv;
using namespace cv::cuda;
void getFlowField(const Mat& u, const Mat& v, Mat& flowField);
int main(int argc, const char* argv[])
{
try
{
const char* keys =
"{ h help | | print help message }"
"{ l left | | specify left image }"
"{ r right | | specify right image }"
"{ s scale | 0.8 | set pyramid scale factor }"
"{ a alpha | 0.197 | set alpha }"
"{ g gamma | 50.0 | set gamma }"
"{ i inner | 10 | set number of inner iterations }"
"{ o outer | 77 | set number of outer iterations }"
"{ si solver | 10 | set number of basic solver iterations }"
"{ t time_step | 0.1 | set frame interpolation time step }";
CommandLineParser cmd(argc, argv, keys);
if (cmd.has("help") || !cmd.check())
{
cmd.printMessage();
cmd.printErrors();
return 0;
}
string frame0Name = cmd.get<string>("left");
string frame1Name = cmd.get<string>("right");
float scale = cmd.get<float>("scale");
float alpha = cmd.get<float>("alpha");
float gamma = cmd.get<float>("gamma");
int inner_iterations = cmd.get<int>("inner");
int outer_iterations = cmd.get<int>("outer");
int solver_iterations = cmd.get<int>("solver");
float timeStep = cmd.get<float>("time_step");
if (frame0Name.empty() || frame1Name.empty())
{
cerr << "Missing input file names" << endl;
return -1;
}
Mat frame0Color = imread(frame0Name);
Mat frame1Color = imread(frame1Name);
if (frame0Color.empty() || frame1Color.empty())
{
cout << "Can't load input images" << endl;
return -1;
}
cv::cuda::printShortCudaDeviceInfo(cv::cuda::getDevice());
cout << "OpenCV / NVIDIA Computer Vision" << endl;
cout << "Optical Flow Demo: Frame Interpolation" << endl;
cout << "=========================================" << endl;
namedWindow("Forward flow");
namedWindow("Backward flow");
namedWindow("Interpolated frame");
cout << "Press:" << endl;
cout << "\tESC to quit" << endl;
cout << "\t'a' to move to the previous frame" << endl;
cout << "\t's' to move to the next frame\n" << endl;
frame0Color.convertTo(frame0Color, CV_32F, 1.0 / 255.0);
frame1Color.convertTo(frame1Color, CV_32F, 1.0 / 255.0);
Mat frame0Gray, frame1Gray;
cv::cvtColor(frame0Color, frame0Gray, COLOR_BGR2GRAY);
cv::cvtColor(frame1Color, frame1Gray, COLOR_BGR2GRAY);
GpuMat d_frame0(frame0Gray);
GpuMat d_frame1(frame1Gray);
cout << "Estimating optical flow" << endl;
BroxOpticalFlow d_flow(alpha, gamma, scale, inner_iterations, outer_iterations, solver_iterations);
cout << "\tForward..." << endl;
GpuMat d_fu, d_fv;
d_flow(d_frame0, d_frame1, d_fu, d_fv);
Mat flowFieldForward;
getFlowField(Mat(d_fu), Mat(d_fv), flowFieldForward);
cout << "\tBackward..." << endl;
GpuMat d_bu, d_bv;
d_flow(d_frame1, d_frame0, d_bu, d_bv);
Mat flowFieldBackward;
getFlowField(Mat(d_bu), Mat(d_bv), flowFieldBackward);
cout << "Interpolating..." << endl;
// first frame color components
GpuMat d_b, d_g, d_r;
// second frame color components
GpuMat d_bt, d_gt, d_rt;
// prepare color components on host and copy them to device memory
Mat channels[3];
cv::split(frame0Color, channels);
d_b.upload(channels[0]);
d_g.upload(channels[1]);
d_r.upload(channels[2]);
cv::split(frame1Color, channels);
d_bt.upload(channels[0]);
d_gt.upload(channels[1]);
d_rt.upload(channels[2]);
// temporary buffer
GpuMat d_buf;
// intermediate frame color components (GPU memory)
GpuMat d_rNew, d_gNew, d_bNew;
GpuMat d_newFrame;
vector<Mat> frames;
frames.reserve(static_cast<int>(1.0f / timeStep) + 2);
frames.push_back(frame0Color);
// compute interpolated frames
for (float timePos = timeStep; timePos < 1.0f; timePos += timeStep)
{
// interpolate blue channel
interpolateFrames(d_b, d_bt, d_fu, d_fv, d_bu, d_bv, timePos, d_bNew, d_buf);
// interpolate green channel
interpolateFrames(d_g, d_gt, d_fu, d_fv, d_bu, d_bv, timePos, d_gNew, d_buf);
// interpolate red channel
interpolateFrames(d_r, d_rt, d_fu, d_fv, d_bu, d_bv, timePos, d_rNew, d_buf);
GpuMat channels3[] = {d_bNew, d_gNew, d_rNew};
cuda::merge(channels3, 3, d_newFrame);
frames.push_back(Mat(d_newFrame));
cout << setprecision(4) << timePos * 100.0f << "%\r";
}
frames.push_back(frame1Color);
cout << setw(5) << "100%" << endl;
cout << "Done" << endl;
imshow("Forward flow", flowFieldForward);
imshow("Backward flow", flowFieldBackward);
int currentFrame = 0;
imshow("Interpolated frame", frames[currentFrame]);
for(;;)
{
int key = toupper(waitKey(10) & 0xff);
switch (key)
{
case 27:
return 0;
case 'A':
if (currentFrame > 0)
--currentFrame;
imshow("Interpolated frame", frames[currentFrame]);
break;
case 'S':
if (currentFrame < static_cast<int>(frames.size()) - 1)
++currentFrame;
imshow("Interpolated frame", frames[currentFrame]);
break;
}
}
}
catch (const exception& ex)
{
cerr << ex.what() << endl;
return -1;
}
catch (...)
{
cerr << "Unknow error" << endl;
return -1;
}
}
template <typename T> inline T clamp (T x, T a, T b)
{
return ((x) > (a) ? ((x) < (b) ? (x) : (b)) : (a));
}
template <typename T> inline T mapValue(T x, T a, T b, T c, T d)
{
x = clamp(x, a, b);
return c + (d - c) * (x - a) / (b - a);
}
void getFlowField(const Mat& u, const Mat& v, Mat& flowField)
{
float maxDisplacement = 1.0f;
for (int i = 0; i < u.rows; ++i)
{
const float* ptr_u = u.ptr<float>(i);
const float* ptr_v = v.ptr<float>(i);
for (int j = 0; j < u.cols; ++j)
{
float d = max(fabsf(ptr_u[j]), fabsf(ptr_v[j]));
if (d > maxDisplacement)
maxDisplacement = d;
}
}
flowField.create(u.size(), CV_8UC4);
for (int i = 0; i < flowField.rows; ++i)
{
const float* ptr_u = u.ptr<float>(i);
const float* ptr_v = v.ptr<float>(i);
Vec4b* row = flowField.ptr<Vec4b>(i);
for (int j = 0; j < flowField.cols; ++j)
{
row[j][0] = 0;
row[j][1] = static_cast<unsigned char> (mapValue (-ptr_v[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f));
row[j][2] = static_cast<unsigned char> (mapValue ( ptr_u[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f));
row[j][3] = 255;
}
}
}

View File

@ -7,6 +7,7 @@
#include "opencv2/highgui.hpp"
#include "opencv2/video.hpp"
#include "opencv2/cudaoptflow.hpp"
#include "opencv2/cudaarithm.hpp"
using namespace std;
using namespace cv;
@ -70,8 +71,8 @@ int main(int argc, char **argv)
if (frameL.empty() || frameR.empty()) return -1;
GpuMat d_frameL(frameL), d_frameR(frameR);
GpuMat d_flowx, d_flowy;
FarnebackOpticalFlow d_calc;
GpuMat d_flow;
Ptr<cuda::FarnebackOpticalFlow> d_calc = cuda::FarnebackOpticalFlow::create();
Mat flowxy, flowx, flowy, image;
bool running = true, gpuMode = true;
@ -86,17 +87,21 @@ int main(int argc, char **argv)
if (gpuMode)
{
tc0 = getTickCount();
d_calc(d_frameL, d_frameR, d_flowx, d_flowy);
d_calc->calc(d_frameL, d_frameR, d_flow);
tc1 = getTickCount();
d_flowx.download(flowx);
d_flowy.download(flowy);
GpuMat planes[2];
cuda::split(d_flow, planes);
planes[0].download(flowx);
planes[1].download(flowy);
}
else
{
tc0 = getTickCount();
calcOpticalFlowFarneback(
frameL, frameR, flowxy, d_calc.pyrScale, d_calc.numLevels, d_calc.winSize,
d_calc.numIters, d_calc.polyN, d_calc.polySigma, d_calc.flags);
frameL, frameR, flowxy, d_calc->getPyrScale(), d_calc->getNumLevels(), d_calc->getWinSize(),
d_calc->getNumIters(), d_calc->getPolyN(), d_calc->getPolySigma(), d_calc->getFlags());
tc1 = getTickCount();
Mat planes[] = {flowx, flowy};

View File

@ -5,6 +5,7 @@
#include <opencv2/core/utility.hpp>
#include "opencv2/highgui.hpp"
#include "opencv2/cudaoptflow.hpp"
#include "opencv2/cudaarithm.hpp"
using namespace std;
using namespace cv;
@ -122,10 +123,13 @@ static void drawOpticalFlow(const Mat_<float>& flowx, const Mat_<float>& flowy,
}
}
static void showFlow(const char* name, const GpuMat& d_flowx, const GpuMat& d_flowy)
static void showFlow(const char* name, const GpuMat& d_flow)
{
Mat flowx(d_flowx);
Mat flowy(d_flowy);
GpuMat planes[2];
cuda::split(d_flow, planes);
Mat flowx(planes[0]);
Mat flowy(planes[1]);
Mat out;
drawOpticalFlow(flowx, flowy, out, 10);
@ -171,14 +175,12 @@ int main(int argc, const char* argv[])
GpuMat d_frame0(frame0);
GpuMat d_frame1(frame1);
GpuMat d_flowx(frame0.size(), CV_32FC1);
GpuMat d_flowy(frame0.size(), CV_32FC1);
GpuMat d_flow(frame0.size(), CV_32FC2);
BroxOpticalFlow brox(0.197f, 50.0f, 0.8f, 10, 77, 10);
PyrLKOpticalFlow lk; lk.winSize = Size(7, 7);
FarnebackOpticalFlow farn;
OpticalFlowDual_TVL1_CUDA tvl1;
FastOpticalFlowBM fastBM;
Ptr<cuda::BroxOpticalFlow> brox = cuda::BroxOpticalFlow::create(0.197f, 50.0f, 0.8f, 10, 77, 10);
Ptr<cuda::DensePyrLKOpticalFlow> lk = cuda::DensePyrLKOpticalFlow::create(Size(7, 7));
Ptr<cuda::FarnebackOpticalFlow> farn = cuda::FarnebackOpticalFlow::create();
Ptr<cuda::OpticalFlowDual_TVL1> tvl1 = cuda::OpticalFlowDual_TVL1::create();
{
GpuMat d_frame0f;
@ -189,68 +191,45 @@ int main(int argc, const char* argv[])
const int64 start = getTickCount();
brox(d_frame0f, d_frame1f, d_flowx, d_flowy);
brox->calc(d_frame0f, d_frame1f, d_flow);
const double timeSec = (getTickCount() - start) / getTickFrequency();
cout << "Brox : " << timeSec << " sec" << endl;
showFlow("Brox", d_flowx, d_flowy);
showFlow("Brox", d_flow);
}
{
const int64 start = getTickCount();
lk.dense(d_frame0, d_frame1, d_flowx, d_flowy);
lk->calc(d_frame0, d_frame1, d_flow);
const double timeSec = (getTickCount() - start) / getTickFrequency();
cout << "LK : " << timeSec << " sec" << endl;
showFlow("LK", d_flowx, d_flowy);
showFlow("LK", d_flow);
}
{
const int64 start = getTickCount();
farn(d_frame0, d_frame1, d_flowx, d_flowy);
farn->calc(d_frame0, d_frame1, d_flow);
const double timeSec = (getTickCount() - start) / getTickFrequency();
cout << "Farn : " << timeSec << " sec" << endl;
showFlow("Farn", d_flowx, d_flowy);
showFlow("Farn", d_flow);
}
{
const int64 start = getTickCount();
tvl1(d_frame0, d_frame1, d_flowx, d_flowy);
tvl1->calc(d_frame0, d_frame1, d_flow);
const double timeSec = (getTickCount() - start) / getTickFrequency();
cout << "TVL1 : " << timeSec << " sec" << endl;
showFlow("TVL1", d_flowx, d_flowy);
}
{
const int64 start = getTickCount();
GpuMat buf;
calcOpticalFlowBM(d_frame0, d_frame1, Size(7, 7), Size(1, 1), Size(21, 21), false, d_flowx, d_flowy, buf);
const double timeSec = (getTickCount() - start) / getTickFrequency();
cout << "BM : " << timeSec << " sec" << endl;
showFlow("BM", d_flowx, d_flowy);
}
{
const int64 start = getTickCount();
fastBM(d_frame0, d_frame1, d_flowx, d_flowy);
const double timeSec = (getTickCount() - start) / getTickFrequency();
cout << "Fast BM : " << timeSec << " sec" << endl;
showFlow("Fast BM", d_flowx, d_flowy);
showFlow("TVL1", d_flow);
}
imshow("Frame 0", frame0);

View File

@ -1187,87 +1187,6 @@ TEST(GoodFeaturesToTrack)
CUDA_OFF;
}
TEST(PyrLKOpticalFlow)
{
Mat frame0 = imread(abspath("../data/rubberwhale1.png"));
if (frame0.empty()) throw runtime_error("can't open ../data/rubberwhale1.png");
Mat frame1 = imread(abspath("../data/rubberwhale2.png"));
if (frame1.empty()) throw runtime_error("can't open ../data/rubberwhale2.png");
Mat gray_frame;
cvtColor(frame0, gray_frame, COLOR_BGR2GRAY);
for (int points = 1000; points <= 8000; points *= 2)
{
SUBTEST << points;
vector<Point2f> pts;
goodFeaturesToTrack(gray_frame, pts, points, 0.01, 0.0);
vector<Point2f> nextPts;
vector<unsigned char> status;
vector<float> err;
calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err);
CPU_ON;
calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err);
CPU_OFF;
cuda::PyrLKOpticalFlow d_pyrLK;
cuda::GpuMat d_frame0(frame0);
cuda::GpuMat d_frame1(frame1);
cuda::GpuMat d_pts;
Mat pts_mat(1, (int)pts.size(), CV_32FC2, (void*)&pts[0]);
d_pts.upload(pts_mat);
cuda::GpuMat d_nextPts;
cuda::GpuMat d_status;
cuda::GpuMat d_err;
d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
CUDA_ON;
d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
CUDA_OFF;
}
}
TEST(FarnebackOpticalFlow)
{
const string datasets[] = {"../data/rubberwhale", "../data/basketball"};
for (size_t i = 0; i < sizeof(datasets)/sizeof(*datasets); ++i) {
for (int fastPyramids = 0; fastPyramids < 2; ++fastPyramids) {
for (int useGaussianBlur = 0; useGaussianBlur < 2; ++useGaussianBlur) {
SUBTEST << "dataset=" << datasets[i] << ", fastPyramids=" << fastPyramids << ", useGaussianBlur=" << useGaussianBlur;
Mat frame0 = imread(abspath(datasets[i] + "1.png"), IMREAD_GRAYSCALE);
Mat frame1 = imread(abspath(datasets[i] + "2.png"), IMREAD_GRAYSCALE);
if (frame0.empty()) throw runtime_error("can't open " + datasets[i] + "1.png");
if (frame1.empty()) throw runtime_error("can't open " + datasets[i] + "2.png");
cuda::FarnebackOpticalFlow calc;
calc.fastPyramids = fastPyramids != 0;
calc.flags |= useGaussianBlur ? OPTFLOW_FARNEBACK_GAUSSIAN : 0;
cuda::GpuMat d_frame0(frame0), d_frame1(frame1), d_flowx, d_flowy;
CUDA_ON;
calc(d_frame0, d_frame1, d_flowx, d_flowy);
CUDA_OFF;
Mat flow;
CPU_ON;
calcOpticalFlowFarneback(frame0, frame1, flow, calc.pyrScale, calc.numLevels, calc.winSize, calc.numIters, calc.polyN, calc.polySigma, calc.flags);
CPU_OFF;
}}}
}
#ifdef HAVE_OPENCV_BGSEGM
TEST(MOG)

View File

@ -77,44 +77,6 @@ template <typename T> inline T mapValue(T x, T a, T b, T c, T d)
return c + (d - c) * (x - a) / (b - a);
}
static void getFlowField(const Mat& u, const Mat& v, Mat& flowField)
{
float maxDisplacement = 1.0f;
for (int i = 0; i < u.rows; ++i)
{
const float* ptr_u = u.ptr<float>(i);
const float* ptr_v = v.ptr<float>(i);
for (int j = 0; j < u.cols; ++j)
{
float d = max(fabsf(ptr_u[j]), fabsf(ptr_v[j]));
if (d > maxDisplacement)
maxDisplacement = d;
}
}
flowField.create(u.size(), CV_8UC4);
for (int i = 0; i < flowField.rows; ++i)
{
const float* ptr_u = u.ptr<float>(i);
const float* ptr_v = v.ptr<float>(i);
Vec4b* row = flowField.ptr<Vec4b>(i);
for (int j = 0; j < flowField.cols; ++j)
{
row[j][0] = 0;
row[j][1] = static_cast<unsigned char> (mapValue (-ptr_v[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f));
row[j][2] = static_cast<unsigned char> (mapValue ( ptr_u[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f));
row[j][3] = 255;
}
}
}
int main(int argc, const char* argv[])
{
const char* keys =
@ -186,12 +148,8 @@ int main(int argc, const char* argv[])
// Sparse
PyrLKOpticalFlow d_pyrLK;
d_pyrLK.winSize.width = winSize;
d_pyrLK.winSize.height = winSize;
d_pyrLK.maxLevel = maxLevel;
d_pyrLK.iters = iters;
Ptr<cuda::SparsePyrLKOpticalFlow> d_pyrLK = cuda::SparsePyrLKOpticalFlow::create(
Size(winSize, winSize), maxLevel, iters);
GpuMat d_frame0(frame0);
GpuMat d_frame1(frame1);
@ -199,7 +157,7 @@ int main(int argc, const char* argv[])
GpuMat d_nextPts;
GpuMat d_status;
d_pyrLK.sparse(useGray ? d_frame0Gray : d_frame0, useGray ? d_frame1Gray : d_frame1, d_prevPts, d_nextPts, d_status);
d_pyrLK->calc(useGray ? d_frame0Gray : d_frame0, useGray ? d_frame1Gray : d_frame1, d_prevPts, d_nextPts, d_status);
// Draw arrows
@ -216,20 +174,6 @@ int main(int argc, const char* argv[])
imshow("PyrLK [Sparse]", frame0);
// Dense
GpuMat d_u;
GpuMat d_v;
d_pyrLK.dense(d_frame0Gray, d_frame1Gray, d_u, d_v);
// Draw flow field
Mat flowField;
getFlowField(Mat(d_u), Mat(d_v), flowField);
imshow("PyrLK [Dense] Flow Field", flowField);
waitKey();
return 0;