update docs

minor fixes and refactoring of GPU module
This commit is contained in:
Vladislav Vinogradov 2011-02-16 08:31:45 +00:00
parent 7d42dbdd71
commit 54fa600b9e
16 changed files with 944 additions and 901 deletions

View File

@ -1,7 +1,55 @@
\section{Feature Detection and Description} \section{Feature Detection and Description}
\cvclass{gpu::SURF\_GPU} \cvclass{gpu::SURFParams\_GPU}\label{class.gpu.SURFParams}
Various SURF algorithm parameters.
\begin{lstlisting}
struct SURFParams_GPU
{
SURFParams_GPU() : threshold(0.1f), nOctaves(4), nIntervals(4),
initialScale(2.f), l1(3.f/1.5f), l2(5.f/1.5f), l3(3.f/1.5f),
l4(1.f/1.5f), edgeScale(0.81f), initialStep(1), extended(true),
featuresRatio(0.01f) {}
//! The interest operator threshold
float threshold;
//! The number of octaves to process
int nOctaves;
//! The number of intervals in each octave
int nIntervals;
//! The scale associated with the first interval of the first octave
float initialScale;
//! mask parameter l_1
float l1;
//! mask parameter l_2
float l2;
//! mask parameter l_3
float l3;
//! mask parameter l_4
float l4;
//! The amount to scale the edge rejection mask
float edgeScale;
//! The initial sampling step in pixels.
int initialStep;
//! True, if generate 128-len descriptors, false - 64-len descriptors
bool extended;
//! max features = featuresRatio * img.size().area()
float featuresRatio;
};
\end{lstlisting}
In contrast to \hyperref[cv.class.SURF]{cv::SURF} \texttt{SURF\_GPU} works with float sources (with range [0..1]). It performs conversion after calculation of the integral by division result by 255. Please take it into consideration when change some parameters (like hessian threshold).
Current \texttt{SURF\_GPU} implementation supports the number of intervals in each octave in range [3..21].
See also: \hyperref[class.gpu.SURF]{cv::gpu::SURF\_GPU}.
\cvclass{gpu::SURF\_GPU}\label{class.gpu.SURF}
Class for extracting Speeded Up Robust Features from an image. Class for extracting Speeded Up Robust Features from an image.
\begin{lstlisting} \begin{lstlisting}
@ -62,7 +110,7 @@ The class \texttt{SURF\_GPU} can store results to GPU and CPU memory and provide
The class \texttt{SURF\_GPU} uses some buffers and provides access to it. All buffers can be safely released between function calls. The class \texttt{SURF\_GPU} uses some buffers and provides access to it. All buffers can be safely released between function calls.
See also: \hyperref[cv.class.SURF]{cv::SURF}. See also: \hyperref[cv.class.SURF]{cv::SURF}, \hyperref[class.gpu.SURFParams]{cv::gpu::SURFParams\_GPU}.
\cvclass{gpu::BruteForceMatcher\_GPU} \cvclass{gpu::BruteForceMatcher\_GPU}
@ -269,7 +317,7 @@ void radiusMatch(const GpuMat\& queryDescs, \par const GpuMat\& trainDescs, \par
void radiusMatch(const GpuMat\& queryDescs, \par std::vector< std::vector<DMatch> >\& matches, \par float maxDistance, \par const std::vector<GpuMat>\& masks = std::vector<GpuMat>(), \par bool compactResult = false); void radiusMatch(const GpuMat\& queryDescs, \par std::vector< std::vector<DMatch> >\& matches, \par float maxDistance, \par const std::vector<GpuMat>\& masks = std::vector<GpuMat>(), \par bool compactResult = false);
} }
This function works only on devices with Compute Capability $>=$ 1.1. \textbf{Please note:} This function works only on devices with Compute Capability $>=$ 1.1.
See also: \cvCppCross{DescriptorMatcher::radiusMatch}. See also: \cvCppCross{DescriptorMatcher::radiusMatch}.
@ -293,7 +341,8 @@ void radiusMatch(const GpuMat\& queryDescs, \par const GpuMat\& trainDescs, \par
In contrast to \hyperref[cppfunc.gpu.BruteForceMatcher.radiusMatch]{cv::gpu::BruteForceMather\_GPU::radiusMatch} results are not sorted by distance increasing order. In contrast to \hyperref[cppfunc.gpu.BruteForceMatcher.radiusMatch]{cv::gpu::BruteForceMather\_GPU::radiusMatch} results are not sorted by distance increasing order.
This function works only on devices with Compute Capability $>=$ 1.1. \textbf{Please note:} This function works only on devices with Compute Capability $>=$ 1.1.
\cvfunc{cv::gpu::BruteForceMatcher\_GPU::radiusMatchDownload}\label{cppfunc.gpu.BruteForceMatcher.radiusMatchDownload} \cvfunc{cv::gpu::BruteForceMatcher\_GPU::radiusMatchDownload}\label{cppfunc.gpu.BruteForceMatcher.radiusMatchDownload}
Downloads \texttt{trainIdx}, \texttt{nMatches} and \texttt{distance} matrices obtained via \hyperref[cppfunc.gpu.BruteForceMatcher.radiusMatchSingle]{radiusMatch} to CPU vector with \hyperref[cv.class.DMatch]{cv::DMatch}. If \texttt{compactResult} is true \texttt{matches} vector will not contain matches for fully masked out query descriptors. Downloads \texttt{trainIdx}, \texttt{nMatches} and \texttt{distance} matrices obtained via \hyperref[cppfunc.gpu.BruteForceMatcher.radiusMatchSingle]{radiusMatch} to CPU vector with \hyperref[cv.class.DMatch]{cv::DMatch}. If \texttt{compactResult} is true \texttt{matches} vector will not contain matches for fully masked out query descriptors.

View File

@ -17,6 +17,8 @@ Performs mean-shift filtering for each point of the source image. It maps each p
\cvarg{criteria}{Termination criteria. See \hyperref[TermCriteria]{cv::TermCriteria}.} \cvarg{criteria}{Termination criteria. See \hyperref[TermCriteria]{cv::TermCriteria}.}
\end{description} \end{description}
\textbf{Please note:} This function works only on devices with Compute Capability $>=$ 1.2.
\cvCppFunc{gpu::meanShiftProc} \cvCppFunc{gpu::meanShiftProc}
Performs mean-shift procedure and stores information about processed points (i.e. their colors and positions) into two images. Performs mean-shift procedure and stores information about processed points (i.e. their colors and positions) into two images.
@ -35,6 +37,8 @@ Performs mean-shift procedure and stores information about processed points (i.e
\cvarg{criteria}{Termination criteria. See \hyperref[TermCriteria]{cv::TermCriteria}.} \cvarg{criteria}{Termination criteria. See \hyperref[TermCriteria]{cv::TermCriteria}.}
\end{description} \end{description}
\textbf{Please note:} This function works only on devices with Compute Capability $>=$ 1.2.
See also: \cvCppCross{gpu::meanShiftFiltering}. See also: \cvCppCross{gpu::meanShiftFiltering}.
@ -55,6 +59,8 @@ Performs mean-shift segmentation of the source image and eleminates small segmen
\cvarg{criteria}{Termination criteria. See \hyperref[TermCriteria]{cv::TermCriteria}.} \cvarg{criteria}{Termination criteria. See \hyperref[TermCriteria]{cv::TermCriteria}.}
\end{description} \end{description}
\textbf{Please note:} This function works only on devices with Compute Capability $>=$ 1.2.
\cvCppFunc{gpu::integral} \cvCppFunc{gpu::integral}
Computes integral image and squared integral image. Computes integral image and squared integral image.
@ -319,7 +325,7 @@ double threshold(const GpuMat\& src, GpuMat\& dst, double thresh, \par double ma
} }
\begin{description} \begin{description}
\cvarg{src}{Source array (single-channel, \texttt{CV\_64F} depth isn't supported).} \cvarg{src}{Source array (single-channel).}
\cvarg{dst}{Destination array; will have the same size and the same type as \texttt{src}.} \cvarg{dst}{Destination array; will have the same size and the same type as \texttt{src}.}
\cvarg{thresh}{Threshold value.} \cvarg{thresh}{Threshold value.}
\cvarg{maxVal}{Maximum value to use with \texttt{THRESH\_BINARY} and \texttt{THRESH\_BINARY\_INV} thresholding types.} \cvarg{maxVal}{Maximum value to use with \texttt{THRESH\_BINARY} and \texttt{THRESH\_BINARY\_INV} thresholding types.}

Binary file not shown.

View File

@ -582,10 +582,10 @@ namespace cv { namespace gpu { namespace bfmatcher
} }
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Match kernel chooser // Match caller
template <typename Dist, typename T, typename Train, typename Mask> template <typename Dist, typename T, typename Train, typename Mask>
void match_chooser(const DevMem2D_<T>& queryDescs, const Train& train, void matchDispatcher(const DevMem2D_<T>& queryDescs, const Train& train,
const Mask& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const Mask& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
bool cc_12) bool cc_12)
{ {
@ -616,11 +616,11 @@ namespace cv { namespace gpu { namespace bfmatcher
if (mask.data) if (mask.data)
{ {
SingleMask m(mask); SingleMask m(mask);
match_chooser<L1Dist>((DevMem2D_<T>)queryDescs, train, m, trainIdx, imgIdx, distance, cc_12); matchDispatcher<L1Dist>((DevMem2D_<T>)queryDescs, train, m, trainIdx, imgIdx, distance, cc_12);
} }
else else
{ {
match_chooser<L1Dist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12); matchDispatcher<L1Dist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12);
} }
} }
@ -640,11 +640,11 @@ namespace cv { namespace gpu { namespace bfmatcher
if (mask.data) if (mask.data)
{ {
SingleMask m(mask); SingleMask m(mask);
match_chooser<L2Dist>((DevMem2D_<T>)queryDescs, train, m, trainIdx, imgIdx, distance, cc_12); matchDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, train, m, trainIdx, imgIdx, distance, cc_12);
} }
else else
{ {
match_chooser<L2Dist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12); matchDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12);
} }
} }
@ -664,11 +664,11 @@ namespace cv { namespace gpu { namespace bfmatcher
if (maskCollection.data) if (maskCollection.data)
{ {
MaskCollection mask(maskCollection.data); MaskCollection mask(maskCollection.data);
match_chooser<L1Dist>((DevMem2D_<T>)queryDescs, train, mask, trainIdx, imgIdx, distance, cc_12); matchDispatcher<L1Dist>((DevMem2D_<T>)queryDescs, train, mask, trainIdx, imgIdx, distance, cc_12);
} }
else else
{ {
match_chooser<L1Dist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12); matchDispatcher<L1Dist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12);
} }
} }
@ -688,11 +688,11 @@ namespace cv { namespace gpu { namespace bfmatcher
if (maskCollection.data) if (maskCollection.data)
{ {
MaskCollection mask(maskCollection.data); MaskCollection mask(maskCollection.data);
match_chooser<L2Dist>((DevMem2D_<T>)queryDescs, train, mask, trainIdx, imgIdx, distance, cc_12); matchDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, train, mask, trainIdx, imgIdx, distance, cc_12);
} }
else else
{ {
match_chooser<L2Dist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12); matchDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12);
} }
} }
@ -942,22 +942,35 @@ namespace cv { namespace gpu { namespace bfmatcher
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// knn match caller // knn match caller
template <typename Dist, typename T, typename Mask>
void calcDistanceDispatcher(const DevMem2D_<T>& queryDescs, const DevMem2D_<T>& trainDescs,
const Mask& mask, const DevMem2Df& allDist)
{
calcDistance_caller<16, 16, Dist>(queryDescs, trainDescs, mask, allDist);
}
void findKnnMatchDispatcher(int knn, const DevMem2Di& trainIdx, const DevMem2Df& distance,
const DevMem2Df& allDist)
{
findKnnMatch_caller<256>(knn, trainIdx, distance, allDist);
}
template <typename T> template <typename T>
void knnMatchL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, void knnMatchL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn,
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist) const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist)
{ {
if (mask.data) if (mask.data)
{ {
calcDistance_caller<16, 16, L1Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs, calcDistanceDispatcher<L1Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
SingleMask(mask), allDist); SingleMask(mask), allDist);
} }
else else
{ {
calcDistance_caller<16, 16, L1Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs, calcDistanceDispatcher<L1Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
WithOutMask(), allDist); WithOutMask(), allDist);
} }
findKnnMatch_caller<256>(knn, trainIdx, distance, allDist); findKnnMatchDispatcher(knn, trainIdx, distance, allDist);
} }
template void knnMatchL1_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist); template void knnMatchL1_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
@ -973,16 +986,16 @@ namespace cv { namespace gpu { namespace bfmatcher
{ {
if (mask.data) if (mask.data)
{ {
calcDistance_caller<16, 16, L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs, calcDistanceDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
SingleMask(mask), allDist); SingleMask(mask), allDist);
} }
else else
{ {
calcDistance_caller<16, 16, L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs, calcDistanceDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
WithOutMask(), allDist); WithOutMask(), allDist);
} }
findKnnMatch_caller<256>(knn, trainIdx, distance, allDist); findKnnMatchDispatcher(knn, trainIdx, distance, allDist);
} }
template void knnMatchL2_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist); template void knnMatchL2_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
@ -1061,7 +1074,16 @@ namespace cv { namespace gpu { namespace bfmatcher
} }
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Radius Match kernel chooser // Radius Match caller
template <typename Dist, typename T, typename Mask>
void radiusMatchDispatcher(const DevMem2D_<T>& queryDescs, const DevMem2D_<T>& trainDescs,
float maxDistance, const Mask& mask, const DevMem2Di& trainIdx, unsigned int* nMatches,
const DevMem2Df& distance)
{
radiusMatch_caller<16, 16, Dist>(queryDescs, trainDescs, maxDistance, mask,
trainIdx, nMatches, distance);
}
template <typename T> template <typename T>
void radiusMatchL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, void radiusMatchL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance,
@ -1069,12 +1091,12 @@ namespace cv { namespace gpu { namespace bfmatcher
{ {
if (mask.data) if (mask.data)
{ {
radiusMatch_caller<16, 16, L1Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs, radiusMatchDispatcher<L1Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
maxDistance, SingleMask(mask), trainIdx, nMatches, distance); maxDistance, SingleMask(mask), trainIdx, nMatches, distance);
} }
else else
{ {
radiusMatch_caller<16, 16, L1Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs, radiusMatchDispatcher<L1Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
maxDistance, WithOutMask(), trainIdx, nMatches, distance); maxDistance, WithOutMask(), trainIdx, nMatches, distance);
} }
} }
@ -1092,12 +1114,12 @@ namespace cv { namespace gpu { namespace bfmatcher
{ {
if (mask.data) if (mask.data)
{ {
radiusMatch_caller<16, 16, L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs, radiusMatchDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
maxDistance, SingleMask(mask), trainIdx, nMatches, distance); maxDistance, SingleMask(mask), trainIdx, nMatches, distance);
} }
else else
{ {
radiusMatch_caller<16, 16, L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs, radiusMatchDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
maxDistance, WithOutMask(), trainIdx, nMatches, distance); maxDistance, WithOutMask(), trainIdx, nMatches, distance);
} }
} }

File diff suppressed because it is too large Load Diff

View File

@ -190,6 +190,9 @@ void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src,
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val) void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
{ {
CV_Assert((src.depth() != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
typedef void (*set_caller_t)(GpuMat& src, const Scalar& s, cudaStream_t stream); typedef void (*set_caller_t)(GpuMat& src, const Scalar& s, cudaStream_t stream);
static const set_caller_t set_callers[] = static const set_caller_t set_callers[] =
{ {
@ -201,6 +204,11 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask) void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask)
{ {
CV_Assert((src.depth() != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
CV_Assert(mask.type() == CV_8UC1);
typedef void (*set_caller_t)(GpuMat& src, const Scalar& s, const GpuMat& mask, cudaStream_t stream); typedef void (*set_caller_t)(GpuMat& src, const Scalar& s, const GpuMat& mask, cudaStream_t stream);
static const set_caller_t set_callers[] = static const set_caller_t set_callers[] =
{ {
@ -212,6 +220,9 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask)
void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, double alpha, double beta) void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, double alpha, double beta)
{ {
CV_Assert((src.depth() != CV_64F && CV_MAT_DEPTH(rtype) != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
bool noScale = fabs(alpha-1) < std::numeric_limits<double>::epsilon() && fabs(beta) < std::numeric_limits<double>::epsilon(); bool noScale = fabs(alpha-1) < std::numeric_limits<double>::epsilon() && fabs(beta) < std::numeric_limits<double>::epsilon();
if( rtype < 0 ) if( rtype < 0 )

View File

@ -626,6 +626,10 @@ namespace
void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
{ {
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
CV_Assert((src1.depth() != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream); typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[] = static const func_t funcs[] =
{ {
@ -637,6 +641,10 @@ void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Stream& stream) void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Stream& stream)
{ {
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
CV_Assert((src1.depth() != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream); typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[] = static const func_t funcs[] =
{ {
@ -648,6 +656,9 @@ void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Str
void cv::gpu::min(const GpuMat& src1, double src2, GpuMat& dst) void cv::gpu::min(const GpuMat& src1, double src2, GpuMat& dst)
{ {
CV_Assert((src1.depth() != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream); typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[] = static const func_t funcs[] =
{ {
@ -659,6 +670,9 @@ void cv::gpu::min(const GpuMat& src1, double src2, GpuMat& dst)
void cv::gpu::min(const GpuMat& src1, double src2, GpuMat& dst, const Stream& stream) void cv::gpu::min(const GpuMat& src1, double src2, GpuMat& dst, const Stream& stream)
{ {
CV_Assert((src1.depth() != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream); typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[] = static const func_t funcs[] =
{ {
@ -670,6 +684,10 @@ void cv::gpu::min(const GpuMat& src1, double src2, GpuMat& dst, const Stream& st
void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
{ {
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
CV_Assert((src1.depth() != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream); typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[] = static const func_t funcs[] =
{ {
@ -681,6 +699,10 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Stream& stream) void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Stream& stream)
{ {
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
CV_Assert((src1.depth() != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream); typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[] = static const func_t funcs[] =
{ {
@ -692,6 +714,9 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Str
void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst) void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst)
{ {
CV_Assert((src1.depth() != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream); typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[] = static const func_t funcs[] =
{ {
@ -703,6 +728,9 @@ void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst)
void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst, const Stream& stream) void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst, const Stream& stream)
{ {
CV_Assert((src1.depth() != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream); typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[] = static const func_t funcs[] =
{ {
@ -749,6 +777,9 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
} }
else else
{ {
CV_Assert((src.depth() != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
typedef void (*caller_t)(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type, typedef void (*caller_t)(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type,
cudaStream_t stream); cudaStream_t stream);

View File

@ -205,6 +205,9 @@ namespace
void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double beta ) const void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double beta ) const
{ {
CV_Assert((depth() != CV_64F && CV_MAT_DEPTH(rtype) != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
bool noScale = fabs(alpha-1) < std::numeric_limits<double>::epsilon() && fabs(beta) < std::numeric_limits<double>::epsilon(); bool noScale = fabs(alpha-1) < std::numeric_limits<double>::epsilon() && fabs(beta) < std::numeric_limits<double>::epsilon();
if( rtype < 0 ) if( rtype < 0 )
@ -428,6 +431,9 @@ GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask)
{ {
CV_Assert(mask.type() == CV_8UC1); CV_Assert(mask.type() == CV_8UC1);
CV_Assert((depth() != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
CV_DbgAssert(!this->empty()); CV_DbgAssert(!this->empty());
NppiSize sz; NppiSize sz;

View File

@ -393,11 +393,37 @@ namespace cv
} }
}; };
template <typename T, typename D, int scn, int dcn> struct UseSmartUn_
{
static const bool value = false;
};
template <typename T, typename D> struct UseSmartUn_<T, D, 1, 1>
{
static const bool value = device::UnReadWriteTraits<T, D>::shift != 1;
};
template <typename T, typename D> struct UseSmartUn
{
static const bool value = UseSmartUn_<T, D, device::VecTraits<T>::cn, device::VecTraits<D>::cn>::value;
};
template <typename T1, typename T2, typename D, int src1cn, int src2cn, int dstcn> struct UseSmartBin_
{
static const bool value = false;
};
template <typename T1, typename T2, typename D> struct UseSmartBin_<T1, T2, D, 1, 1, 1>
{
static const bool value = device::BinReadWriteTraits<T1, T2, D>::shift != 1;
};
template <typename T1, typename T2, typename D> struct UseSmartBin
{
static const bool value = UseSmartBin_<T1, T2, D, device::VecTraits<T1>::cn, device::VecTraits<T2>::cn, device::VecTraits<D>::cn>::value;
};
template <typename T, typename D, typename UnOp, typename Mask> template <typename T, typename D, typename UnOp, typename Mask>
static void transform_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, const Mask& mask, static void transform_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, const Mask& mask,
cudaStream_t stream = 0) cudaStream_t stream = 0)
{ {
TransformDispatcher<device::VecTraits<T>::cn == 1 && device::VecTraits<D>::cn == 1 && device::UnReadWriteTraits<T, D>::shift != 1>::call(src, dst, op, mask, stream); TransformDispatcher< UseSmartUn<T, D>::value >::call(src, dst, op, mask, stream);
} }
template <typename T, typename D, typename UnOp> template <typename T, typename D, typename UnOp>
@ -416,7 +442,7 @@ namespace cv
static void transform_caller(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, static void transform_caller(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst,
BinOp op, const Mask& mask, cudaStream_t stream = 0) BinOp op, const Mask& mask, cudaStream_t stream = 0)
{ {
TransformDispatcher<device::VecTraits<T1>::cn == 1 && device::VecTraits<T2>::cn == 1 && device::VecTraits<D>::cn == 1 && device::BinReadWriteTraits<T1, T2, D>::shift != 1>::call(src1, src2, dst, op, mask, stream); TransformDispatcher< UseSmartBin<T1, T2, D>::value >::call(src1, src2, dst, op, mask, stream);
} }
template <typename T1, typename T2, typename D, typename BinOp> template <typename T1, typename T2, typename D, typename BinOp>

View File

@ -681,3 +681,66 @@ TEST(erode)
GPU_OFF; GPU_OFF;
} }
} }
TEST(threshold)
{
Mat src, dst;
gpu::GpuMat d_src, d_dst;
for (int size = 2000; size <= 4000; size += 1000)
{
SUBTEST << "size " << size << ", 8U, THRESH_TRUNC";
gen(src, size, size, CV_8U, 0, 100);
dst.create(size, size, CV_8U);
CPU_ON;
threshold(src, dst, 50.0, 0.0, THRESH_TRUNC);
CPU_OFF;
d_src = src;
d_dst.create(size, size, CV_8U);
GPU_ON;
gpu::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
GPU_OFF;
}
for (int size = 2000; size <= 4000; size += 1000)
{
SUBTEST << "size " << size << ", 8U, THRESH_BINARY";
gen(src, size, size, CV_8U, 0, 100);
dst.create(size, size, CV_8U);
CPU_ON;
threshold(src, dst, 50.0, 0.0, THRESH_BINARY);
CPU_OFF;
d_src = src;
d_dst.create(size, size, CV_8U);
GPU_ON;
gpu::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
GPU_OFF;
}
for (int size = 2000; size <= 4000; size += 1000)
{
SUBTEST << "size " << size << ", 32F, THRESH_TRUNC";
gen(src, size, size, CV_32F, 0, 100);
dst.create(size, size, CV_32F);
CPU_ON;
threshold(src, dst, 50.0, 0.0, THRESH_TRUNC);
CPU_OFF;
d_src = src;
d_dst.create(size, size, CV_32F);
GPU_ON;
gpu::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
GPU_OFF;
}
}

View File

@ -384,7 +384,7 @@ void CV_GpuBruteForceMatcherTest::knnMatchTest( const GpuMat& query, const GpuMa
void CV_GpuBruteForceMatcherTest::radiusMatchTest( const GpuMat& query, const GpuMat& train ) void CV_GpuBruteForceMatcherTest::radiusMatchTest( const GpuMat& query, const GpuMat& train )
{ {
bool atomics_ok = TargetArchs::builtWith(ATOMICS) && DeviceInfo().supports(ATOMICS); bool atomics_ok = TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS);
if (!atomics_ok) if (!atomics_ok)
{ {
ts->printf(CvTS::CONSOLE, "\nCode and device atomics support is required for radiusMatch (CC >= 1.1)"); ts->printf(CvTS::CONSOLE, "\nCode and device atomics support is required for radiusMatch (CC >= 1.1)");

View File

@ -53,7 +53,7 @@ struct CV_GpuMeanShiftTest : public CvTest
void run(int) void run(int)
{ {
bool cc12_ok = TargetArchs::builtWith(COMPUTE_12) && DeviceInfo().supports(COMPUTE_12); bool cc12_ok = TargetArchs::builtWith(FEATURE_SET_COMPUTE_12) && DeviceInfo().supports(FEATURE_SET_COMPUTE_12);
if (!cc12_ok) if (!cc12_ok)
{ {
ts->printf(CvTS::CONSOLE, "\nCompute capability 1.2 is required"); ts->printf(CvTS::CONSOLE, "\nCompute capability 1.2 is required");
@ -67,8 +67,8 @@ struct CV_GpuMeanShiftTest : public CvTest
cv::Mat img = cv::imread(std::string(ts->get_data_path()) + "meanshift/cones.png"); cv::Mat img = cv::imread(std::string(ts->get_data_path()) + "meanshift/cones.png");
cv::Mat img_template; cv::Mat img_template;
if (cv::gpu::TargetArchs::builtWith(cv::gpu::COMPUTE_20) && if (cv::gpu::TargetArchs::builtWith(cv::gpu::FEATURE_SET_COMPUTE_20) &&
cv::gpu::DeviceInfo().supports(cv::gpu::COMPUTE_20)) cv::gpu::DeviceInfo().supports(cv::gpu::FEATURE_SET_COMPUTE_20))
img_template = cv::imread(std::string(ts->get_data_path()) + "meanshift/con_result.png"); img_template = cv::imread(std::string(ts->get_data_path()) + "meanshift/con_result.png");
else else
img_template = cv::imread(std::string(ts->get_data_path()) + "meanshift/con_result_CC1X.png"); img_template = cv::imread(std::string(ts->get_data_path()) + "meanshift/con_result_CC1X.png");
@ -145,7 +145,7 @@ struct CV_GpuMeanShiftProcTest : public CvTest
void run(int) void run(int)
{ {
bool cc12_ok = TargetArchs::builtWith(COMPUTE_12) && DeviceInfo().supports(COMPUTE_12); bool cc12_ok = TargetArchs::builtWith(FEATURE_SET_COMPUTE_12) && DeviceInfo().supports(FEATURE_SET_COMPUTE_12);
if (!cc12_ok) if (!cc12_ok)
{ {
ts->printf(CvTS::CONSOLE, "\nCompute capability 1.2 is required"); ts->printf(CvTS::CONSOLE, "\nCompute capability 1.2 is required");
@ -219,8 +219,8 @@ struct CV_GpuMeanShiftProcTest : public CvTest
cv::Mat spmap_template; cv::Mat spmap_template;
cv::FileStorage fs; cv::FileStorage fs;
if (cv::gpu::TargetArchs::builtWith(cv::gpu::COMPUTE_20) && if (cv::gpu::TargetArchs::builtWith(cv::gpu::FEATURE_SET_COMPUTE_20) &&
cv::gpu::DeviceInfo().supports(cv::gpu::COMPUTE_20)) cv::gpu::DeviceInfo().supports(cv::gpu::FEATURE_SET_COMPUTE_20))
fs.open(std::string(ts->get_data_path()) + "meanshift/spmap.yaml", cv::FileStorage::READ); fs.open(std::string(ts->get_data_path()) + "meanshift/spmap.yaml", cv::FileStorage::READ);
else else
fs.open(std::string(ts->get_data_path()) + "meanshift/spmap_CC1X.yaml", cv::FileStorage::READ); fs.open(std::string(ts->get_data_path()) + "meanshift/spmap_CC1X.yaml", cv::FileStorage::READ);

View File

@ -54,7 +54,7 @@ struct CV_GpuMeanShiftSegmentationTest : public CvTest {
{ {
try try
{ {
bool cc12_ok = TargetArchs::builtWith(COMPUTE_12) && DeviceInfo().supports(COMPUTE_12); bool cc12_ok = TargetArchs::builtWith(FEATURE_SET_COMPUTE_12) && DeviceInfo().supports(FEATURE_SET_COMPUTE_12);
if (!cc12_ok) if (!cc12_ok)
{ {
ts->printf(CvTS::CONSOLE, "\nCompute capability 1.2 is required"); ts->printf(CvTS::CONSOLE, "\nCompute capability 1.2 is required");
@ -77,7 +77,7 @@ struct CV_GpuMeanShiftSegmentationTest : public CvTest {
{ {
stringstream path; stringstream path;
path << ts->get_data_path() << "meanshift/cones_segmented_sp10_sr10_minsize" << minsize; path << ts->get_data_path() << "meanshift/cones_segmented_sp10_sr10_minsize" << minsize;
if (TargetArchs::builtWith(COMPUTE_20) && DeviceInfo().supports(COMPUTE_20)) if (TargetArchs::builtWith(FEATURE_SET_COMPUTE_20) && DeviceInfo().supports(FEATURE_SET_COMPUTE_20))
path << ".png"; path << ".png";
else else
path << "_CC1X.png"; path << "_CC1X.png";

View File

@ -66,21 +66,24 @@ void CV_GpuMatOpConvertToTest::run(int /* start_from */)
{ {
const Size img_size(67, 35); const Size img_size(67, 35);
const int types[] = {CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F};
const int types_num = sizeof(types) / sizeof(int);
const char* types_str[] = {"CV_8U", "CV_8S", "CV_16U", "CV_16S", "CV_32S", "CV_32F", "CV_64F"}; const char* types_str[] = {"CV_8U", "CV_8S", "CV_16U", "CV_16S", "CV_32S", "CV_32F", "CV_64F"};
bool passed = true; bool passed = true;
try try
{ {
for (int i = 0; i < types_num && passed; ++i) int lastType = CV_32F;
if (TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE))
lastType = CV_64F;
for (int i = 0; i <= lastType && passed; ++i)
{ {
for (int j = 0; j < types_num && passed; ++j) for (int j = 0; j <= lastType && passed; ++j)
{ {
for (int c = 1; c < 5 && passed; ++c) for (int c = 1; c < 5 && passed; ++c)
{ {
const int src_type = CV_MAKETYPE(types[i], c); const int src_type = CV_MAKETYPE(i, c);
const int dst_type = types[j]; const int dst_type = j;
cv::RNG rng(*ts->get_rng()); cv::RNG rng(*ts->get_rng());

View File

@ -126,7 +126,12 @@ void CV_GpuMatOpCopyToTest::run( int /* start_from */)
try try
{ {
for (int i = 0 ; i < 7; i++) int lastType = CV_32F;
if (TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE))
lastType = CV_64F;
for (int i = 0 ; i <= lastType; i++)
{ {
Mat cpumat(rows, cols, i); Mat cpumat(rows, cols, i);
cpumat.setTo(Scalar::all(127)); cpumat.setTo(Scalar::all(127));

View File

@ -101,7 +101,12 @@ void CV_GpuMatOpSetToTest::run( int /* start_from */)
rng.fill(cpumask, RNG::UNIFORM, cv::Scalar::all(0.0), cv::Scalar(1.5)); rng.fill(cpumask, RNG::UNIFORM, cv::Scalar::all(0.0), cv::Scalar(1.5));
cv::gpu::GpuMat gpumask(cpumask); cv::gpu::GpuMat gpumask(cpumask);
for (int i = 0; i < 7; i++) int lastType = CV_32F;
if (TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE))
lastType = CV_64F;
for (int i = 0; i <= lastType; i++)
{ {
for (int cn = 1; cn <= 4; ++cn) for (int cn = 1; cn <= 4; ++cn)
{ {