switched to Input/Output Array in min/max operations

This commit is contained in:
Vladislav Vinogradov 2013-04-25 15:23:44 +04:00
parent f2aa6ebe15
commit ec70282bf7
4 changed files with 86 additions and 191 deletions

View File

@ -115,6 +115,12 @@ CV_EXPORTS void rshift(InputArray src, Scalar_<int> val, OutputArray dst, Stream
//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
CV_EXPORTS void lshift(InputArray src, Scalar_<int> val, OutputArray dst, Stream& stream = Stream::Null());
//! computes per-element minimum of two arrays (dst = min(src1, src2))
CV_EXPORTS void min(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null());
//! computes per-element maximum of two arrays (dst = max(src1, src2))
CV_EXPORTS void max(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null());
//! computes the weighted sum of two arrays (dst = alpha*src1 + beta*src2 + gamma)
CV_EXPORTS void addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst,
int dtype = -1, Stream& stream = Stream::Null());
@ -125,18 +131,6 @@ static inline void scaleAdd(const GpuMat& src1, double alpha, const GpuMat& src2
addWeighted(src1, alpha, src2, 1.0, 0.0, dst, -1, stream);
}
//! computes per-element minimum of two arrays (dst = min(src1, src2))
CV_EXPORTS void min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null());
//! computes per-element minimum of array and scalar (dst = min(src1, src2))
CV_EXPORTS void min(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream = Stream::Null());
//! computes per-element maximum of two arrays (dst = max(src1, src2))
CV_EXPORTS void max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null());
//! computes per-element maximum of array and scalar (dst = max(src1, src2))
CV_EXPORTS void max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream = Stream::Null());
//! implements generalized matrix product algorithm GEMM from BLAS
CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha,
const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null());

View File

@ -48,46 +48,30 @@ using namespace cv::gpu;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
void cv::gpu::add(InputArray, InputArray, OutputArray, InputArray, int, Stream&) { throw_no_cuda(); }
void cv::gpu::subtract(InputArray, InputArray, OutputArray, InputArray, int, Stream&) { throw_no_cuda(); }
void cv::gpu::multiply(InputArray, InputArray, OutputArray, double, int, Stream&) { throw_no_cuda(); }
void cv::gpu::divide(InputArray, InputArray, OutputArray, double, int, Stream&) { throw_no_cuda(); }
void cv::gpu::absdiff(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::gpu::abs(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::gpu::sqr(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::gpu::sqrt(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::gpu::exp(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::gpu::log(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::gpu::pow(InputArray, double, OutputArray, Stream&) { throw_no_cuda(); }
void cv::gpu::compare(InputArray, InputArray, OutputArray, int, Stream&) { throw_no_cuda(); }
void cv::gpu::bitwise_not(InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
void cv::gpu::bitwise_or(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
void cv::gpu::bitwise_and(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
void cv::gpu::bitwise_xor(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
void cv::gpu::rshift(InputArray, Scalar_<int>, OutputArray, Stream&) { throw_no_cuda(); }
void cv::gpu::lshift(InputArray, Scalar_<int>, OutputArray, Stream&) { throw_no_cuda(); }
void cv::gpu::min(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
void cv::gpu::min(const GpuMat&, double, GpuMat&, Stream&) { throw_no_cuda(); }
void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
void cv::gpu::max(const GpuMat&, double, GpuMat&, Stream&) { throw_no_cuda(); }
void cv::gpu::min(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::gpu::max(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::gpu::addWeighted(const GpuMat&, double, const GpuMat&, double, double, GpuMat&, int, Stream&) { throw_no_cuda(); }
@ -2262,6 +2246,15 @@ void cv::gpu::lshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Stream
//////////////////////////////////////////////////////////////////////////////
// Minimum and maximum operations
namespace
{
enum
{
MIN_OP,
MAX_OP
};
}
namespace arithm
{
void minMat_v4(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
@ -2275,12 +2268,13 @@ namespace arithm
template <typename T> void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
}
void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s)
void minMaxMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& _stream, int op)
{
using namespace arithm;
typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
static const func_t funcs[] =
static const func_t funcs[2][7] =
{
{
minMat<unsigned char>,
minMat<signed char>,
@ -2289,77 +2283,7 @@ void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s
minMat<int>,
minMat<float>,
minMat<double>
};
const int depth = src1.depth();
const int cn = src1.channels();
CV_Assert( depth <= CV_64F );
CV_Assert( src2.type() == src1.type() && src2.size() == src1.size() );
if (depth == CV_64F)
{
if (!deviceSupports(NATIVE_DOUBLE))
CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
}
dst.create(src1.size(), src1.type());
cudaStream_t stream = StreamAccessor::getStream(s);
PtrStepSzb src1_(src1.rows, src1.cols * cn, src1.data, src1.step);
PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step);
PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step);
if (depth == CV_8U || depth == CV_16U)
{
const intptr_t src1ptr = reinterpret_cast<intptr_t>(src1_.data);
const intptr_t src2ptr = reinterpret_cast<intptr_t>(src2_.data);
const intptr_t dstptr = reinterpret_cast<intptr_t>(dst_.data);
const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0;
if (isAllAligned)
{
if (depth == CV_8U && (src1_.cols & 3) == 0)
{
const int vcols = src1_.cols >> 2;
minMat_v4(PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step),
stream);
return;
}
else if (depth == CV_16U && (src1_.cols & 1) == 0)
{
const int vcols = src1_.cols >> 1;
minMat_v2(PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step),
stream);
return;
}
}
}
const func_t func = funcs[depth];
if (!func)
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
func(src1_, src2_, dst_, stream);
}
void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s)
{
using namespace arithm;
typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
static const func_t funcs[] =
},
{
maxMat<unsigned char>,
maxMat<signed char>,
@ -2368,23 +2292,25 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s
maxMat<int>,
maxMat<float>,
maxMat<double>
}
};
typedef void (*opt_func_t)(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
static const opt_func_t funcs_v4[2] =
{
minMat_v4, maxMat_v4
};
static const opt_func_t funcs_v2[2] =
{
minMat_v2, maxMat_v2
};
const int depth = src1.depth();
const int cn = src1.channels();
CV_Assert( depth <= CV_64F );
CV_Assert( src2.type() == src1.type() && src2.size() == src1.size() );
if (depth == CV_64F)
{
if (!deviceSupports(NATIVE_DOUBLE))
CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
}
dst.create(src1.size(), src1.type());
cudaStream_t stream = StreamAccessor::getStream(s);
cudaStream_t stream = StreamAccessor::getStream(_stream);
PtrStepSzb src1_(src1.rows, src1.cols * cn, src1.data, src1.step);
PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step);
@ -2404,7 +2330,7 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s
{
const int vcols = src1_.cols >> 2;
maxMat_v4(PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
funcs_v4[op](PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step),
stream);
@ -2415,7 +2341,7 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s
{
const int vcols = src1_.cols >> 1;
maxMat_v2(PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
funcs_v2[op](PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step),
stream);
@ -2425,7 +2351,7 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s
}
}
const func_t func = funcs[depth];
const func_t func = funcs[op][depth];
if (!func)
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
@ -2441,12 +2367,13 @@ namespace
}
}
void cv::gpu::min(const GpuMat& src, double val, GpuMat& dst, Stream& stream)
void minMaxScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat&, double, Stream& stream, int op)
{
using namespace arithm;
typedef void (*func_t)(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
static const func_t funcs[] =
static const func_t funcs[2][7] =
{
{
minScalar<unsigned char>,
minScalar<signed char>,
@ -2455,36 +2382,7 @@ void cv::gpu::min(const GpuMat& src, double val, GpuMat& dst, Stream& stream)
minScalar<int>,
minScalar<float>,
minScalar<double>
};
typedef double (*cast_func_t)(double sc);
static const cast_func_t cast_func[] =
{
castScalar<unsigned char>, castScalar<signed char>, castScalar<unsigned short>, castScalar<short>, castScalar<int>, castScalar<float>, castScalar<double>
};
const int depth = src.depth();
CV_Assert( depth <= CV_64F );
CV_Assert( src.channels() == 1 );
if (depth == CV_64F)
{
if (!deviceSupports(NATIVE_DOUBLE))
CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
}
dst.create(src.size(), src.type());
funcs[depth](src, cast_func[depth](val), dst, StreamAccessor::getStream(stream));
}
void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream)
{
using namespace arithm;
typedef void (*func_t)(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
static const func_t funcs[] =
},
{
maxScalar<unsigned char>,
maxScalar<signed char>,
@ -2493,6 +2391,7 @@ void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream)
maxScalar<int>,
maxScalar<float>,
maxScalar<double>
}
};
typedef double (*cast_func_t)(double sc);
@ -2506,15 +2405,17 @@ void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream)
CV_Assert( depth <= CV_64F );
CV_Assert( src.channels() == 1 );
if (depth == CV_64F)
{
if (!deviceSupports(NATIVE_DOUBLE))
CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
}
funcs[op][depth](src, cast_func[depth](val[0]), dst, StreamAccessor::getStream(stream));
}
dst.create(src.size(), src.type());
void cv::gpu::min(InputArray src1, InputArray src2, OutputArray dst, Stream& stream)
{
arithm_op(src1, src2, dst, noArray(), 1.0, -1, stream, minMaxMat, minMaxScalar, MIN_OP);
}
funcs[depth](src, cast_func[depth](val), dst, StreamAccessor::getStream(stream));
void cv::gpu::max(InputArray src1, InputArray src2, OutputArray dst, Stream& stream)
{
arithm_op(src1, src2, dst, noArray(), 1.0, -1, stream, minMaxMat, minMaxScalar, MAX_OP);
}
////////////////////////////////////////////////////////////////////////

View File

@ -761,7 +761,7 @@ namespace
{
buildRTable_gpu(edgePointList.ptr<unsigned int>(0), edgePointList.ptr<float>(1), edgePointList.cols,
r_table, r_sizes.ptr<int>(), make_short2(templCenter.x, templCenter.y), levels);
min(r_sizes, maxSize, r_sizes);
gpu::min(r_sizes, maxSize, r_sizes);
}
}

View File

@ -147,7 +147,7 @@ namespace
if (use_mask)
{
min(mask, 1.0, surf_.mask1);
gpu::min(mask, 1.0, surf_.mask1);
gpu::integralBuffered(surf_.mask1, surf_.maskSum, surf_.intBuffer);
maskOffset = bindMaskSumTex(surf_.maskSum);
}