mirror of
https://github.com/opencv/opencv.git
synced 2024-11-24 19:20:28 +08:00
switched to Input/Output Array in reductions operations
This commit is contained in:
parent
c52d56964c
commit
8fcef225fb
@ -458,7 +458,7 @@ public:
|
||||
|
||||
// generate integral for scale
|
||||
gpu::resize(image, src, level.sFrame, 0, 0, cv::INTER_LINEAR);
|
||||
gpu::integralBuffered(src, sint, buff);
|
||||
gpu::integral(src, sint, buff);
|
||||
|
||||
// calculate job
|
||||
int totalWidth = level.workArea.width / step;
|
||||
|
@ -209,85 +209,150 @@ inline void LUT(InputArray src, InputArray lut, OutputArray dst, Stream& stream)
|
||||
CV_EXPORTS void copyMakeBorder(InputArray src, OutputArray dst, int top, int bottom, int left, int right, int borderType,
|
||||
Scalar value = Scalar(), Stream& stream = Stream::Null());
|
||||
|
||||
//! implements generalized matrix product algorithm GEMM from BLAS
|
||||
CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha,
|
||||
const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null());
|
||||
|
||||
//! scales and shifts array elements so that either the specified norm (alpha) or the minimum (alpha) and maximum (beta) array values get the specified values
|
||||
CV_EXPORTS void normalize(const GpuMat& src, GpuMat& dst, double alpha = 1, double beta = 0,
|
||||
int norm_type = NORM_L2, int dtype = -1, const GpuMat& mask = GpuMat());
|
||||
CV_EXPORTS void normalize(const GpuMat& src, GpuMat& dst, double a, double b,
|
||||
int norm_type, int dtype, const GpuMat& mask, GpuMat& norm_buf, GpuMat& cvt_buf);
|
||||
|
||||
//! computes norm of array
|
||||
//! supports NORM_INF, NORM_L1, NORM_L2
|
||||
//! supports all matrices except 64F
|
||||
CV_EXPORTS double norm(const GpuMat& src1, int normType=NORM_L2);
|
||||
CV_EXPORTS double norm(const GpuMat& src1, int normType, GpuMat& buf);
|
||||
CV_EXPORTS double norm(const GpuMat& src1, int normType, const GpuMat& mask, GpuMat& buf);
|
||||
CV_EXPORTS double norm(InputArray src1, int normType, InputArray mask, GpuMat& buf);
|
||||
static inline double norm(InputArray src, int normType)
|
||||
{
|
||||
GpuMat buf;
|
||||
return norm(src, normType, GpuMat(), buf);
|
||||
}
|
||||
static inline double norm(InputArray src, int normType, GpuMat& buf)
|
||||
{
|
||||
return norm(src, normType, GpuMat(), buf);
|
||||
}
|
||||
|
||||
//! computes norm of the difference between two arrays
|
||||
//! supports NORM_INF, NORM_L1, NORM_L2
|
||||
//! supports only CV_8UC1 type
|
||||
CV_EXPORTS double norm(const GpuMat& src1, const GpuMat& src2, int normType=NORM_L2);
|
||||
CV_EXPORTS double norm(InputArray src1, InputArray src2, GpuMat& buf, int normType=NORM_L2);
|
||||
static inline double norm(InputArray src1, InputArray src2, int normType=NORM_L2)
|
||||
{
|
||||
GpuMat buf;
|
||||
return norm(src1, src2, buf, normType);
|
||||
}
|
||||
|
||||
//! computes sum of array elements
|
||||
//! supports only single channel images
|
||||
CV_EXPORTS Scalar sum(const GpuMat& src);
|
||||
CV_EXPORTS Scalar sum(const GpuMat& src, GpuMat& buf);
|
||||
CV_EXPORTS Scalar sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf);
|
||||
CV_EXPORTS Scalar sum(InputArray src, InputArray mask, GpuMat& buf);
|
||||
static inline Scalar sum(InputArray src)
|
||||
{
|
||||
GpuMat buf;
|
||||
return sum(src, GpuMat(), buf);
|
||||
}
|
||||
static inline Scalar sum(InputArray src, GpuMat& buf)
|
||||
{
|
||||
return sum(src, GpuMat(), buf);
|
||||
}
|
||||
|
||||
//! computes sum of array elements absolute values
|
||||
//! supports only single channel images
|
||||
CV_EXPORTS Scalar absSum(const GpuMat& src);
|
||||
CV_EXPORTS Scalar absSum(const GpuMat& src, GpuMat& buf);
|
||||
CV_EXPORTS Scalar absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf);
|
||||
CV_EXPORTS Scalar absSum(InputArray src, InputArray mask, GpuMat& buf);
|
||||
static inline Scalar absSum(InputArray src)
|
||||
{
|
||||
GpuMat buf;
|
||||
return absSum(src, GpuMat(), buf);
|
||||
}
|
||||
static inline Scalar absSum(InputArray src, GpuMat& buf)
|
||||
{
|
||||
return absSum(src, GpuMat(), buf);
|
||||
}
|
||||
|
||||
//! computes squared sum of array elements
|
||||
//! supports only single channel images
|
||||
CV_EXPORTS Scalar sqrSum(const GpuMat& src);
|
||||
CV_EXPORTS Scalar sqrSum(const GpuMat& src, GpuMat& buf);
|
||||
CV_EXPORTS Scalar sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf);
|
||||
CV_EXPORTS Scalar sqrSum(InputArray src, InputArray mask, GpuMat& buf);
|
||||
static inline Scalar sqrSum(InputArray src)
|
||||
{
|
||||
GpuMat buf;
|
||||
return sqrSum(src, GpuMat(), buf);
|
||||
}
|
||||
static inline Scalar sqrSum(InputArray src, GpuMat& buf)
|
||||
{
|
||||
return sqrSum(src, GpuMat(), buf);
|
||||
}
|
||||
|
||||
//! finds global minimum and maximum array elements and returns their values
|
||||
CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal=0, const GpuMat& mask=GpuMat());
|
||||
CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf);
|
||||
CV_EXPORTS void minMax(InputArray src, double* minVal, double* maxVal, InputArray mask, GpuMat& buf);
|
||||
static inline void minMax(InputArray src, double* minVal, double* maxVal=0, InputArray mask=noArray())
|
||||
{
|
||||
GpuMat buf;
|
||||
minMax(src, minVal, maxVal, mask, buf);
|
||||
}
|
||||
|
||||
//! finds global minimum and maximum array elements and returns their values with locations
|
||||
CV_EXPORTS void minMaxLoc(const GpuMat& src, double* minVal, double* maxVal=0, Point* minLoc=0, Point* maxLoc=0,
|
||||
const GpuMat& mask=GpuMat());
|
||||
CV_EXPORTS void minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc,
|
||||
const GpuMat& mask, GpuMat& valbuf, GpuMat& locbuf);
|
||||
CV_EXPORTS void minMaxLoc(InputArray src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc,
|
||||
InputArray mask, GpuMat& valbuf, GpuMat& locbuf);
|
||||
static inline void minMaxLoc(InputArray src, double* minVal, double* maxVal=0, Point* minLoc=0, Point* maxLoc=0,
|
||||
InputArray mask=noArray())
|
||||
{
|
||||
GpuMat valBuf, locBuf;
|
||||
minMaxLoc(src, minVal, maxVal, minLoc, maxLoc, mask, valBuf, locBuf);
|
||||
}
|
||||
|
||||
//! counts non-zero array elements
|
||||
CV_EXPORTS int countNonZero(const GpuMat& src);
|
||||
CV_EXPORTS int countNonZero(const GpuMat& src, GpuMat& buf);
|
||||
CV_EXPORTS int countNonZero(InputArray src, GpuMat& buf);
|
||||
static inline int countNonZero(const GpuMat& src)
|
||||
{
|
||||
GpuMat buf;
|
||||
return countNonZero(src, buf);
|
||||
}
|
||||
|
||||
//! reduces a matrix to a vector
|
||||
CV_EXPORTS void reduce(const GpuMat& mtx, GpuMat& vec, int dim, int reduceOp, int dtype = -1, Stream& stream = Stream::Null());
|
||||
CV_EXPORTS void reduce(InputArray mtx, OutputArray vec, int dim, int reduceOp, int dtype = -1, Stream& stream = Stream::Null());
|
||||
|
||||
//! computes mean value and standard deviation of all or selected array elements
|
||||
//! supports only CV_8UC1 type
|
||||
CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev);
|
||||
//! buffered version
|
||||
CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev, GpuMat& buf);
|
||||
CV_EXPORTS void meanStdDev(InputArray mtx, Scalar& mean, Scalar& stddev, GpuMat& buf);
|
||||
static inline void meanStdDev(InputArray src, Scalar& mean, Scalar& stddev)
|
||||
{
|
||||
GpuMat buf;
|
||||
meanStdDev(src, mean, stddev, buf);
|
||||
}
|
||||
|
||||
//! computes the standard deviation of integral images
|
||||
//! supports only CV_32SC1 source type and CV_32FC1 sqr type
|
||||
//! output will have CV_32FC1 type
|
||||
CV_EXPORTS void rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& stream = Stream::Null());
|
||||
CV_EXPORTS void rectStdDev(InputArray src, InputArray sqr, OutputArray dst, Rect rect, Stream& stream = Stream::Null());
|
||||
|
||||
//! scales and shifts array elements so that either the specified norm (alpha) or the minimum (alpha) and maximum (beta) array values get the specified values
|
||||
CV_EXPORTS void normalize(InputArray src, OutputArray dst, double alpha, double beta,
|
||||
int norm_type, int dtype, InputArray mask, GpuMat& norm_buf, GpuMat& cvt_buf);
|
||||
static inline void normalize(InputArray src, OutputArray dst, double alpha = 1, double beta = 0,
|
||||
int norm_type = NORM_L2, int dtype = -1, InputArray mask = noArray())
|
||||
{
|
||||
GpuMat norm_buf;
|
||||
GpuMat cvt_buf;
|
||||
normalize(src, dst, alpha, beta, norm_type, dtype, mask, norm_buf, cvt_buf);
|
||||
}
|
||||
|
||||
//! computes the integral image
|
||||
//! sum will have CV_32S type, but will contain unsigned int values
|
||||
//! supports only CV_8UC1 source type
|
||||
CV_EXPORTS void integral(const GpuMat& src, GpuMat& sum, Stream& stream = Stream::Null());
|
||||
//! buffered version
|
||||
CV_EXPORTS void integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, Stream& stream = Stream::Null());
|
||||
CV_EXPORTS void integral(InputArray src, OutputArray sum, GpuMat& buffer, Stream& stream = Stream::Null());
|
||||
static inline void integralBuffered(InputArray src, OutputArray sum, GpuMat& buffer, Stream& stream = Stream::Null())
|
||||
{
|
||||
integral(src, sum, buffer, stream);
|
||||
}
|
||||
static inline void integral(InputArray src, OutputArray sum, Stream& stream = Stream::Null())
|
||||
{
|
||||
GpuMat buffer;
|
||||
integral(src, sum, buffer, stream);
|
||||
}
|
||||
|
||||
//! computes squared integral image
|
||||
//! result matrix will have 64F type, but will contain 64U values
|
||||
//! supports source images of 8UC1 type only
|
||||
CV_EXPORTS void sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& stream = Stream::Null());
|
||||
CV_EXPORTS void sqrIntegral(InputArray src, OutputArray sqsum, GpuMat& buf, Stream& stream = Stream::Null());
|
||||
static inline void sqrIntegral(InputArray src, OutputArray sqsum, Stream& stream = Stream::Null())
|
||||
{
|
||||
GpuMat buffer;
|
||||
sqrIntegral(src, sqsum, buffer, stream);
|
||||
}
|
||||
|
||||
//! implements generalized matrix product algorithm GEMM from BLAS
|
||||
CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha,
|
||||
const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null());
|
||||
|
||||
//! performs per-element multiplication of two full (not packed) Fourier spectrums
|
||||
//! supports 32FC2 matrixes only (interleaved format)
|
||||
|
@ -265,7 +265,7 @@ PERF_TEST_P(Sz, Integral,
|
||||
cv::gpu::GpuMat dst;
|
||||
cv::gpu::GpuMat d_buf;
|
||||
|
||||
TEST_CYCLE() cv::gpu::integralBuffered(d_src, dst, d_buf);
|
||||
TEST_CYCLE() cv::gpu::integral(d_src, dst, d_buf);
|
||||
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
@ -293,9 +293,9 @@ PERF_TEST_P(Sz, IntegralSqr,
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat dst;
|
||||
cv::gpu::GpuMat dst, buf;
|
||||
|
||||
TEST_CYCLE() cv::gpu::sqrIntegral(d_src, dst);
|
||||
TEST_CYCLE() cv::gpu::sqrIntegral(d_src, dst, buf);
|
||||
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
|
@ -108,9 +108,10 @@ PERF_TEST_P(Sz_Norm, NormDiff,
|
||||
{
|
||||
const cv::gpu::GpuMat d_src1(src1);
|
||||
const cv::gpu::GpuMat d_src2(src2);
|
||||
cv::gpu::GpuMat d_buf;
|
||||
double gpu_dst;
|
||||
|
||||
TEST_CYCLE() gpu_dst = cv::gpu::norm(d_src1, d_src2, normType);
|
||||
TEST_CYCLE() gpu_dst = cv::gpu::norm(d_src1, d_src2, d_buf, normType);
|
||||
|
||||
SANITY_CHECK(gpu_dst);
|
||||
|
||||
|
@ -49,11 +49,6 @@ using namespace cv::gpu;
|
||||
|
||||
void cv::gpu::gemm(const GpuMat&, const GpuMat&, double, const GpuMat&, double, GpuMat&, int, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::gpu::integral(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
void cv::gpu::integralBuffered(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::gpu::sqrIntegral(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::gpu::mulSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, bool, Stream&) { throw_no_cuda(); }
|
||||
void cv::gpu::mulAndScaleSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, float, bool, Stream&) { throw_no_cuda(); }
|
||||
|
||||
@ -294,116 +289,6 @@ void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const G
|
||||
#endif
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// integral
|
||||
|
||||
void cv::gpu::integral(const GpuMat& src, GpuMat& sum, Stream& s)
|
||||
{
|
||||
GpuMat buffer;
|
||||
gpu::integralBuffered(src, sum, buffer, s);
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
void shfl_integral_gpu(const PtrStepSzb& img, PtrStepSz<unsigned int> integral, cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, Stream& s)
|
||||
{
|
||||
CV_Assert(src.type() == CV_8UC1);
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
|
||||
cv::Size whole;
|
||||
cv::Point offset;
|
||||
|
||||
src.locateROI(whole, offset);
|
||||
|
||||
if (deviceSupports(WARP_SHUFFLE_FUNCTIONS) && src.cols <= 2048
|
||||
&& offset.x % 16 == 0 && ((src.cols + 63) / 64) * 64 <= (static_cast<int>(src.step) - offset.x))
|
||||
{
|
||||
ensureSizeIsEnough(((src.rows + 7) / 8) * 8, ((src.cols + 63) / 64) * 64, CV_32SC1, buffer);
|
||||
|
||||
cv::gpu::cudev::imgproc::shfl_integral_gpu(src, buffer, stream);
|
||||
|
||||
sum.create(src.rows + 1, src.cols + 1, CV_32SC1);
|
||||
|
||||
sum.setTo(Scalar::all(0), s);
|
||||
|
||||
GpuMat inner = sum(Rect(1, 1, src.cols, src.rows));
|
||||
GpuMat res = buffer(Rect(0, 0, src.cols, src.rows));
|
||||
|
||||
res.copyTo(inner, s);
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifndef HAVE_OPENCV_GPULEGACY
|
||||
throw_no_cuda();
|
||||
#else
|
||||
sum.create(src.rows + 1, src.cols + 1, CV_32SC1);
|
||||
|
||||
NcvSize32u roiSize;
|
||||
roiSize.width = src.cols;
|
||||
roiSize.height = src.rows;
|
||||
|
||||
cudaDeviceProp prop;
|
||||
cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
|
||||
|
||||
Ncv32u bufSize;
|
||||
ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
|
||||
ensureSizeIsEnough(1, bufSize, CV_8UC1, buffer);
|
||||
|
||||
NppStStreamHandler h(stream);
|
||||
|
||||
ncvSafeCall( nppiStIntegral_8u32u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>()), static_cast<int>(src.step),
|
||||
sum.ptr<Ncv32u>(), static_cast<int>(sum.step), roiSize, buffer.ptr<Ncv8u>(), bufSize, prop) );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// sqrIntegral
|
||||
|
||||
void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& s)
|
||||
{
|
||||
#ifndef HAVE_OPENCV_GPULEGACY
|
||||
(void) src;
|
||||
(void) sqsum;
|
||||
(void) s;
|
||||
throw_no_cuda();
|
||||
#else
|
||||
CV_Assert(src.type() == CV_8U);
|
||||
|
||||
NcvSize32u roiSize;
|
||||
roiSize.width = src.cols;
|
||||
roiSize.height = src.rows;
|
||||
|
||||
cudaDeviceProp prop;
|
||||
cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
|
||||
|
||||
Ncv32u bufSize;
|
||||
ncvSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize, prop));
|
||||
GpuMat buf(1, bufSize, CV_8U);
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
|
||||
NppStStreamHandler h(stream);
|
||||
|
||||
sqsum.create(src.rows + 1, src.cols + 1, CV_64F);
|
||||
ncvSafeCall(nppiStSqrIntegral_8u64u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>(0)), static_cast<int>(src.step),
|
||||
sqsum.ptr<Ncv64u>(0), static_cast<int>(sqsum.step), roiSize, buf.ptr<Ncv8u>(0), bufSize, prop));
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
#endif
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// mulSpectrums
|
||||
|
||||
@ -650,8 +535,6 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
|
||||
(void) stream;
|
||||
throw_no_cuda();
|
||||
#else
|
||||
using namespace cv::gpu::cudev::imgproc;
|
||||
|
||||
CV_Assert(image.type() == CV_32F);
|
||||
CV_Assert(templ.type() == CV_32F);
|
||||
|
||||
|
@ -47,41 +47,28 @@ using namespace cv::gpu;
|
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
|
||||
double cv::gpu::norm(const GpuMat&, int) { throw_no_cuda(); return 0.0; }
|
||||
double cv::gpu::norm(const GpuMat&, int, GpuMat&) { throw_no_cuda(); return 0.0; }
|
||||
double cv::gpu::norm(const GpuMat&, int, const GpuMat&, GpuMat&) { throw_no_cuda(); return 0.0; }
|
||||
double cv::gpu::norm(const GpuMat&, const GpuMat&, int) { throw_no_cuda(); return 0.0; }
|
||||
double cv::gpu::norm(InputArray, int, InputArray, GpuMat&) { throw_no_cuda(); return 0.0; }
|
||||
double cv::gpu::norm(InputArray, InputArray, GpuMat&, int) { throw_no_cuda(); return 0.0; }
|
||||
|
||||
Scalar cv::gpu::sum(const GpuMat&) { throw_no_cuda(); return Scalar(); }
|
||||
Scalar cv::gpu::sum(const GpuMat&, GpuMat&) { throw_no_cuda(); return Scalar(); }
|
||||
Scalar cv::gpu::sum(const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); return Scalar(); }
|
||||
Scalar cv::gpu::sum(InputArray, InputArray, GpuMat&) { throw_no_cuda(); return Scalar(); }
|
||||
Scalar cv::gpu::absSum(InputArray, InputArray, GpuMat&) { throw_no_cuda(); return Scalar(); }
|
||||
Scalar cv::gpu::sqrSum(InputArray, InputArray, GpuMat&) { throw_no_cuda(); return Scalar(); }
|
||||
|
||||
Scalar cv::gpu::absSum(const GpuMat&) { throw_no_cuda(); return Scalar(); }
|
||||
Scalar cv::gpu::absSum(const GpuMat&, GpuMat&) { throw_no_cuda(); return Scalar(); }
|
||||
Scalar cv::gpu::absSum(const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); return Scalar(); }
|
||||
void cv::gpu::minMax(InputArray, double*, double*, InputArray, GpuMat&) { throw_no_cuda(); }
|
||||
void cv::gpu::minMaxLoc(InputArray, double*, double*, Point*, Point*, InputArray, GpuMat&, GpuMat&) { throw_no_cuda(); }
|
||||
|
||||
Scalar cv::gpu::sqrSum(const GpuMat&) { throw_no_cuda(); return Scalar(); }
|
||||
Scalar cv::gpu::sqrSum(const GpuMat&, GpuMat&) { throw_no_cuda(); return Scalar(); }
|
||||
Scalar cv::gpu::sqrSum(const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); return Scalar(); }
|
||||
int cv::gpu::countNonZero(InputArray, GpuMat&) { throw_no_cuda(); return 0; }
|
||||
|
||||
void cv::gpu::minMax(const GpuMat&, double*, double*, const GpuMat&) { throw_no_cuda(); }
|
||||
void cv::gpu::minMax(const GpuMat&, double*, double*, const GpuMat&, GpuMat&) { throw_no_cuda(); }
|
||||
void cv::gpu::reduce(InputArray, OutputArray, int, int, int, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::gpu::minMaxLoc(const GpuMat&, double*, double*, Point*, Point*, const GpuMat&) { throw_no_cuda(); }
|
||||
void cv::gpu::minMaxLoc(const GpuMat&, double*, double*, Point*, Point*, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
|
||||
void cv::gpu::meanStdDev(InputArray, Scalar&, Scalar&, GpuMat&) { throw_no_cuda(); }
|
||||
|
||||
int cv::gpu::countNonZero(const GpuMat&) { throw_no_cuda(); return 0; }
|
||||
int cv::gpu::countNonZero(const GpuMat&, GpuMat&) { throw_no_cuda(); return 0; }
|
||||
void cv::gpu::rectStdDev(InputArray, InputArray, OutputArray, Rect, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::gpu::reduce(const GpuMat&, GpuMat&, int, int, int, Stream&) { throw_no_cuda(); }
|
||||
void cv::gpu::normalize(InputArray, OutputArray, double, double, int, int, InputArray, GpuMat&, GpuMat&) { throw_no_cuda(); }
|
||||
|
||||
void cv::gpu::meanStdDev(const GpuMat&, Scalar&, Scalar&) { throw_no_cuda(); }
|
||||
void cv::gpu::meanStdDev(const GpuMat&, Scalar&, Scalar&, GpuMat&) { throw_no_cuda(); }
|
||||
|
||||
void cv::gpu::rectStdDev(const GpuMat&, const GpuMat&, GpuMat&, const Rect&, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::gpu::normalize(const GpuMat&, GpuMat&, double, double, int, int, const GpuMat&) { throw_no_cuda(); }
|
||||
void cv::gpu::normalize(const GpuMat&, GpuMat&, double, double, int, int, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
|
||||
void cv::gpu::integral(InputArray, OutputArray, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
void cv::gpu::sqrIntegral(InputArray, OutputArray, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
|
||||
#else
|
||||
|
||||
@ -124,21 +111,13 @@ namespace
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// norm
|
||||
|
||||
double cv::gpu::norm(const GpuMat& src, int normType)
|
||||
double cv::gpu::norm(InputArray _src, int normType, InputArray _mask, GpuMat& buf)
|
||||
{
|
||||
GpuMat buf;
|
||||
return gpu::norm(src, normType, GpuMat(), buf);
|
||||
}
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat mask = _mask.getGpuMat();
|
||||
|
||||
double cv::gpu::norm(const GpuMat& src, int normType, GpuMat& buf)
|
||||
{
|
||||
return gpu::norm(src, normType, GpuMat(), buf);
|
||||
}
|
||||
|
||||
double cv::gpu::norm(const GpuMat& src, int normType, const GpuMat& mask, GpuMat& buf)
|
||||
{
|
||||
CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2);
|
||||
CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size() && src.channels() == 1));
|
||||
CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 );
|
||||
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size() && src.channels() == 1) );
|
||||
|
||||
GpuMat src_single_channel = src.reshape(1);
|
||||
|
||||
@ -154,13 +133,11 @@ double cv::gpu::norm(const GpuMat& src, int normType, const GpuMat& mask, GpuMat
|
||||
return std::max(std::abs(min_val), std::abs(max_val));
|
||||
}
|
||||
|
||||
double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
|
||||
double cv::gpu::norm(InputArray _src1, InputArray _src2, GpuMat& buf, int normType)
|
||||
{
|
||||
CV_Assert(src1.type() == CV_8UC1);
|
||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||
CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2);
|
||||
|
||||
#if CUDA_VERSION < 5050
|
||||
(void) buf;
|
||||
|
||||
typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, NppiSize oSizeROI, Npp64f* pRetVal);
|
||||
|
||||
static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R};
|
||||
@ -175,13 +152,18 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
|
||||
static const buf_size_func_t buf_size_funcs[] = {nppiNormDiffInfGetBufferHostSize_8u_C1R, nppiNormDiffL1GetBufferHostSize_8u_C1R, nppiNormDiffL2GetBufferHostSize_8u_C1R};
|
||||
#endif
|
||||
|
||||
GpuMat src1 = _src1.getGpuMat();
|
||||
GpuMat src2 = _src2.getGpuMat();
|
||||
|
||||
CV_Assert( src1.type() == CV_8UC1 );
|
||||
CV_Assert( src1.size() == src2.size() && src1.type() == src2.type() );
|
||||
CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 );
|
||||
|
||||
NppiSize sz;
|
||||
sz.width = src1.cols;
|
||||
sz.height = src1.rows;
|
||||
|
||||
int funcIdx = normType >> 1;
|
||||
|
||||
double retVal;
|
||||
const int funcIdx = normType >> 1;
|
||||
|
||||
DeviceBuffer dbuf;
|
||||
|
||||
@ -191,13 +173,14 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
|
||||
int bufSize;
|
||||
buf_size_funcs[funcIdx](sz, &bufSize);
|
||||
|
||||
GpuMat buf(1, bufSize, CV_8UC1);
|
||||
ensureSizeIsEnough(1, bufSize, CV_8UC1, buf);
|
||||
|
||||
nppSafeCall( funcs[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf, buf.data) );
|
||||
#endif
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
double retVal;
|
||||
dbuf.download(&retVal);
|
||||
|
||||
return retVal;
|
||||
@ -220,19 +203,11 @@ namespace sum
|
||||
void runSqr(PtrStepSzb src, void* buf, double* sum, PtrStepSzb mask);
|
||||
}
|
||||
|
||||
Scalar cv::gpu::sum(const GpuMat& src)
|
||||
Scalar cv::gpu::sum(InputArray _src, InputArray _mask, GpuMat& buf)
|
||||
{
|
||||
GpuMat buf;
|
||||
return gpu::sum(src, GpuMat(), buf);
|
||||
}
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat mask = _mask.getGpuMat();
|
||||
|
||||
Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf)
|
||||
{
|
||||
return gpu::sum(src, GpuMat(), buf);
|
||||
}
|
||||
|
||||
Scalar cv::gpu::sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf)
|
||||
{
|
||||
typedef void (*func_t)(PtrStepSzb src, void* buf, double* sum, PtrStepSzb mask);
|
||||
static const func_t funcs[7][5] =
|
||||
{
|
||||
@ -266,19 +241,11 @@ Scalar cv::gpu::sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf)
|
||||
return Scalar(result[0], result[1], result[2], result[3]);
|
||||
}
|
||||
|
||||
Scalar cv::gpu::absSum(const GpuMat& src)
|
||||
Scalar cv::gpu::absSum(InputArray _src, InputArray _mask, GpuMat& buf)
|
||||
{
|
||||
GpuMat buf;
|
||||
return gpu::absSum(src, GpuMat(), buf);
|
||||
}
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat mask = _mask.getGpuMat();
|
||||
|
||||
Scalar cv::gpu::absSum(const GpuMat& src, GpuMat& buf)
|
||||
{
|
||||
return gpu::absSum(src, GpuMat(), buf);
|
||||
}
|
||||
|
||||
Scalar cv::gpu::absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf)
|
||||
{
|
||||
typedef void (*func_t)(PtrStepSzb src, void* buf, double* sum, PtrStepSzb mask);
|
||||
static const func_t funcs[7][5] =
|
||||
{
|
||||
@ -312,19 +279,11 @@ Scalar cv::gpu::absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf)
|
||||
return Scalar(result[0], result[1], result[2], result[3]);
|
||||
}
|
||||
|
||||
Scalar cv::gpu::sqrSum(const GpuMat& src)
|
||||
Scalar cv::gpu::sqrSum(InputArray _src, InputArray _mask, GpuMat& buf)
|
||||
{
|
||||
GpuMat buf;
|
||||
return gpu::sqrSum(src, GpuMat(), buf);
|
||||
}
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat mask = _mask.getGpuMat();
|
||||
|
||||
Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf)
|
||||
{
|
||||
return gpu::sqrSum(src, GpuMat(), buf);
|
||||
}
|
||||
|
||||
Scalar cv::gpu::sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf)
|
||||
{
|
||||
typedef void (*func_t)(PtrStepSzb src, void* buf, double* sum, PtrStepSzb mask);
|
||||
static const func_t funcs[7][5] =
|
||||
{
|
||||
@ -369,14 +328,11 @@ namespace minMax
|
||||
void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
|
||||
}
|
||||
|
||||
void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask)
|
||||
void cv::gpu::minMax(InputArray _src, double* minVal, double* maxVal, InputArray _mask, GpuMat& buf)
|
||||
{
|
||||
GpuMat buf;
|
||||
gpu::minMax(src, minVal, maxVal, mask, buf);
|
||||
}
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat mask = _mask.getGpuMat();
|
||||
|
||||
void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf)
|
||||
{
|
||||
typedef void (*func_t)(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
@ -419,15 +375,12 @@ namespace minMaxLoc
|
||||
void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
|
||||
}
|
||||
|
||||
void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, const GpuMat& mask)
|
||||
void cv::gpu::minMaxLoc(InputArray _src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc,
|
||||
InputArray _mask, GpuMat& valBuf, GpuMat& locBuf)
|
||||
{
|
||||
GpuMat valBuf, locBuf;
|
||||
gpu::minMaxLoc(src, minVal, maxVal, minLoc, maxLoc, mask, valBuf, locBuf);
|
||||
}
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat mask = _mask.getGpuMat();
|
||||
|
||||
void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc,
|
||||
const GpuMat& mask, GpuMat& valBuf, GpuMat& locBuf)
|
||||
{
|
||||
typedef void (*func_t)(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
@ -472,14 +425,10 @@ namespace countNonZero
|
||||
int run(const PtrStepSzb src, PtrStep<unsigned int> buf);
|
||||
}
|
||||
|
||||
int cv::gpu::countNonZero(const GpuMat& src)
|
||||
int cv::gpu::countNonZero(InputArray _src, GpuMat& buf)
|
||||
{
|
||||
GpuMat buf;
|
||||
return countNonZero(src, buf);
|
||||
}
|
||||
GpuMat src = _src.getGpuMat();
|
||||
|
||||
int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf)
|
||||
{
|
||||
typedef int (*func_t)(const PtrStepSzb src, PtrStep<unsigned int> buf);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
@ -521,8 +470,10 @@ namespace reduce
|
||||
void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
|
||||
}
|
||||
|
||||
void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int dtype, Stream& stream)
|
||||
void cv::gpu::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp, int dtype, Stream& stream)
|
||||
{
|
||||
GpuMat src = _src.getGpuMat();
|
||||
|
||||
CV_Assert( src.channels() <= 4 );
|
||||
CV_Assert( dim == 0 || dim == 1 );
|
||||
CV_Assert( reduceOp == REDUCE_SUM || reduceOp == REDUCE_AVG || reduceOp == REDUCE_MAX || reduceOp == REDUCE_MIN );
|
||||
@ -530,7 +481,8 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
|
||||
if (dtype < 0)
|
||||
dtype = src.depth();
|
||||
|
||||
dst.create(1, dim == 0 ? src.cols : src.rows, CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()));
|
||||
_dst.create(1, dim == 0 ? src.cols : src.rows, CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()));
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
|
||||
if (dim == 0)
|
||||
{
|
||||
@ -691,15 +643,11 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// meanStdDev
|
||||
|
||||
void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev)
|
||||
void cv::gpu::meanStdDev(InputArray _src, Scalar& mean, Scalar& stddev, GpuMat& buf)
|
||||
{
|
||||
GpuMat buf;
|
||||
meanStdDev(src, mean, stddev, buf);
|
||||
}
|
||||
GpuMat src = _src.getGpuMat();
|
||||
|
||||
void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev, GpuMat& buf)
|
||||
{
|
||||
CV_Assert(src.type() == CV_8UC1);
|
||||
CV_Assert( src.type() == CV_8UC1 );
|
||||
|
||||
if (!deviceSupports(FEATURE_SET_COMPUTE_13))
|
||||
CV_Error(cv::Error::StsNotImplemented, "Not sufficient compute capebility");
|
||||
@ -730,11 +678,15 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev, GpuMat
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// rectStdDev
|
||||
|
||||
void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& s)
|
||||
void cv::gpu::rectStdDev(InputArray _src, InputArray _sqr, OutputArray _dst, Rect rect, Stream& _stream)
|
||||
{
|
||||
CV_Assert(src.type() == CV_32SC1 && sqr.type() == CV_64FC1);
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat sqr = _sqr.getGpuMat();
|
||||
|
||||
dst.create(src.size(), CV_32FC1);
|
||||
CV_Assert( src.type() == CV_32SC1 && sqr.type() == CV_64FC1 );
|
||||
|
||||
_dst.create(src.size(), CV_32FC1);
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
|
||||
NppiSize sz;
|
||||
sz.width = src.cols;
|
||||
@ -746,7 +698,7 @@ void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, cons
|
||||
nppRect.x = rect.x;
|
||||
nppRect.y = rect.y;
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
cudaStream_t stream = StreamAccessor::getStream(_stream);
|
||||
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
@ -760,16 +712,12 @@ void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, cons
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// normalize
|
||||
|
||||
void cv::gpu::normalize(const GpuMat& src, GpuMat& dst, double a, double b, int norm_type, int dtype, const GpuMat& mask)
|
||||
void cv::gpu::normalize(InputArray _src, OutputArray dst, double a, double b, int norm_type, int dtype, InputArray mask, GpuMat& norm_buf, GpuMat& cvt_buf)
|
||||
{
|
||||
GpuMat norm_buf;
|
||||
GpuMat cvt_buf;
|
||||
normalize(src, dst, a, b, norm_type, dtype, mask, norm_buf, cvt_buf);
|
||||
}
|
||||
GpuMat src = _src.getGpuMat();
|
||||
|
||||
void cv::gpu::normalize(const GpuMat& src, GpuMat& dst, double a, double b, int norm_type, int dtype, const GpuMat& mask, GpuMat& norm_buf, GpuMat& cvt_buf)
|
||||
{
|
||||
double scale = 1, shift = 0;
|
||||
|
||||
if (norm_type == NORM_MINMAX)
|
||||
{
|
||||
double smin = 0, smax = 0;
|
||||
@ -800,4 +748,116 @@ void cv::gpu::normalize(const GpuMat& src, GpuMat& dst, double a, double b, int
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// integral
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
void shfl_integral_gpu(const PtrStepSzb& img, PtrStepSz<unsigned int> integral, cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
void cv::gpu::integral(InputArray _src, OutputArray _dst, GpuMat& buffer, Stream& _stream)
|
||||
{
|
||||
GpuMat src = _src.getGpuMat();
|
||||
|
||||
CV_Assert( src.type() == CV_8UC1 );
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(_stream);
|
||||
|
||||
cv::Size whole;
|
||||
cv::Point offset;
|
||||
src.locateROI(whole, offset);
|
||||
|
||||
if (deviceSupports(WARP_SHUFFLE_FUNCTIONS) && src.cols <= 2048
|
||||
&& offset.x % 16 == 0 && ((src.cols + 63) / 64) * 64 <= (static_cast<int>(src.step) - offset.x))
|
||||
{
|
||||
ensureSizeIsEnough(((src.rows + 7) / 8) * 8, ((src.cols + 63) / 64) * 64, CV_32SC1, buffer);
|
||||
|
||||
cv::gpu::cudev::imgproc::shfl_integral_gpu(src, buffer, stream);
|
||||
|
||||
_dst.create(src.rows + 1, src.cols + 1, CV_32SC1);
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
|
||||
dst.setTo(Scalar::all(0), _stream);
|
||||
|
||||
GpuMat inner = dst(Rect(1, 1, src.cols, src.rows));
|
||||
GpuMat res = buffer(Rect(0, 0, src.cols, src.rows));
|
||||
|
||||
res.copyTo(inner, _stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifndef HAVE_OPENCV_GPULEGACY
|
||||
throw_no_cuda();
|
||||
#else
|
||||
_dst.create(src.rows + 1, src.cols + 1, CV_32SC1);
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
|
||||
NcvSize32u roiSize;
|
||||
roiSize.width = src.cols;
|
||||
roiSize.height = src.rows;
|
||||
|
||||
cudaDeviceProp prop;
|
||||
cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
|
||||
|
||||
Ncv32u bufSize;
|
||||
ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
|
||||
ensureSizeIsEnough(1, bufSize, CV_8UC1, buffer);
|
||||
|
||||
NppStStreamHandler h(stream);
|
||||
|
||||
ncvSafeCall( nppiStIntegral_8u32u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>()), static_cast<int>(src.step),
|
||||
dst.ptr<Ncv32u>(), static_cast<int>(dst.step), roiSize, buffer.ptr<Ncv8u>(), bufSize, prop) );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// sqrIntegral
|
||||
|
||||
void cv::gpu::sqrIntegral(InputArray _src, OutputArray _dst, GpuMat& buf, Stream& _stream)
|
||||
{
|
||||
#ifndef HAVE_OPENCV_GPULEGACY
|
||||
(void) _src;
|
||||
(void) _dst;
|
||||
(void) _stream;
|
||||
throw_no_cuda();
|
||||
#else
|
||||
GpuMat src = _src.getGpuMat();
|
||||
|
||||
CV_Assert( src.type() == CV_8U );
|
||||
|
||||
NcvSize32u roiSize;
|
||||
roiSize.width = src.cols;
|
||||
roiSize.height = src.rows;
|
||||
|
||||
cudaDeviceProp prop;
|
||||
cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
|
||||
|
||||
Ncv32u bufSize;
|
||||
ncvSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize, prop));
|
||||
|
||||
ensureSizeIsEnough(1, bufSize, CV_8U, buf);
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(_stream);
|
||||
|
||||
NppStStreamHandler h(stream);
|
||||
|
||||
_dst.create(src.rows + 1, src.cols + 1, CV_64F);
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
|
||||
ncvSafeCall(nppiStSqrIntegral_8u64u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>(0)), static_cast<int>(src.step),
|
||||
dst.ptr<Ncv64u>(0), static_cast<int>(dst.step), roiSize, buf.ptr<Ncv8u>(0), bufSize, prop));
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -268,7 +268,7 @@ namespace
|
||||
buf.image_sums.resize(1);
|
||||
gpu::integral(image, buf.image_sums[0], stream);
|
||||
|
||||
unsigned int templ_sum = (unsigned int)sum(templ)[0];
|
||||
unsigned int templ_sum = (unsigned int)gpu::sum(templ)[0];
|
||||
matchTemplatePrepared_CCOFF_8U(templ.cols, templ.rows, buf.image_sums[0], templ_sum, result, StreamAccessor::getStream(stream));
|
||||
}
|
||||
else
|
||||
|
@ -142,13 +142,13 @@ namespace
|
||||
|
||||
bindImgTex(img);
|
||||
|
||||
gpu::integralBuffered(img, surf_.sum, surf_.intBuffer);
|
||||
gpu::integral(img, surf_.sum, surf_.intBuffer);
|
||||
sumOffset = bindSumTex(surf_.sum);
|
||||
|
||||
if (use_mask)
|
||||
{
|
||||
gpu::min(mask, 1.0, surf_.mask1);
|
||||
gpu::integralBuffered(surf_.mask1, surf_.maskSum, surf_.intBuffer);
|
||||
gpu::integral(surf_.mask1, surf_.maskSum, surf_.intBuffer);
|
||||
maskOffset = bindMaskSumTex(surf_.maskSum);
|
||||
}
|
||||
}
|
||||
|
@ -138,7 +138,7 @@ void Worker::operator()(int device_id) const
|
||||
gpu::transpose(d_src, d_dst);
|
||||
|
||||
// Check results
|
||||
bool passed = norm(dst - Mat(d_dst), NORM_INF) < 1e-3;
|
||||
bool passed = cv::norm(dst - Mat(d_dst), NORM_INF) < 1e-3;
|
||||
std::cout << "GPU #" << device_id << " (" << DeviceInfo().name() << "): "
|
||||
<< (passed ? "passed" : "FAILED") << endl;
|
||||
|
||||
|
@ -22,9 +22,9 @@ inline T mapVal(T x, T a, T b, T c, T d)
|
||||
static void colorizeFlow(const Mat &u, const Mat &v, Mat &dst)
|
||||
{
|
||||
double uMin, uMax;
|
||||
minMaxLoc(u, &uMin, &uMax, 0, 0);
|
||||
cv::minMaxLoc(u, &uMin, &uMax, 0, 0);
|
||||
double vMin, vMax;
|
||||
minMaxLoc(v, &vMin, &vMax, 0, 0);
|
||||
cv::minMaxLoc(v, &vMin, &vMax, 0, 0);
|
||||
uMin = ::abs(uMin); uMax = ::abs(uMax);
|
||||
vMin = ::abs(vMin); vMax = ::abs(vMax);
|
||||
float dMax = static_cast<float>(::max(::max(uMin, uMax), ::max(vMin, vMax)));
|
||||
|
@ -95,7 +95,7 @@ void Worker::operator()(int device_id) const
|
||||
gpu::transpose(d_src, d_dst);
|
||||
|
||||
// Check results
|
||||
bool passed = norm(dst - Mat(d_dst), NORM_INF) < 1e-3;
|
||||
bool passed = cv::norm(dst - Mat(d_dst), NORM_INF) < 1e-3;
|
||||
std::cout << "GPU #" << device_id << " (" << DeviceInfo().name() << "): "
|
||||
<< (passed ? "passed" : "FAILED") << endl;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user